1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 66 { 67 PetscErrorCode ierr; 68 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 69 70 PetscFunctionBegin; 71 if (mat->A) { 72 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 73 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 74 } 75 PetscFunctionReturn(0); 76 } 77 78 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 79 { 80 PetscErrorCode ierr; 81 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 82 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 83 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 84 const PetscInt *ia,*ib; 85 const MatScalar *aa,*bb,*aav,*bav; 86 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 87 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 88 89 PetscFunctionBegin; 90 *keptrows = NULL; 91 92 ia = a->i; 93 ib = b->i; 94 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 95 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) { 100 cnt++; 101 goto ok1; 102 } 103 aa = aav + ia[i]; 104 for (j=0; j<na; j++) { 105 if (aa[j] != 0.0) goto ok1; 106 } 107 bb = bav + ib[i]; 108 for (j=0; j <nb; j++) { 109 if (bb[j] != 0.0) goto ok1; 110 } 111 cnt++; 112 ok1:; 113 } 114 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr); 115 if (!n0rows) { 116 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 117 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 121 cnt = 0; 122 for (i=0; i<m; i++) { 123 na = ia[i+1] - ia[i]; 124 nb = ib[i+1] - ib[i]; 125 if (!na && !nb) continue; 126 aa = aav + ia[i]; 127 for (j=0; j<na;j++) { 128 if (aa[j] != 0.0) { 129 rows[cnt++] = rstart + i; 130 goto ok2; 131 } 132 } 133 bb = bav + ib[i]; 134 for (j=0; j<nb; j++) { 135 if (bb[j] != 0.0) { 136 rows[cnt++] = rstart + i; 137 goto ok2; 138 } 139 } 140 ok2:; 141 } 142 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 143 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 144 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 149 { 150 PetscErrorCode ierr; 151 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 152 PetscBool cong; 153 154 PetscFunctionBegin; 155 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 156 if (Y->assembled && cong) { 157 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 158 } else { 159 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 160 } 161 PetscFunctionReturn(0); 162 } 163 164 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 165 { 166 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 167 PetscErrorCode ierr; 168 PetscInt i,rstart,nrows,*rows; 169 170 PetscFunctionBegin; 171 *zrows = NULL; 172 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 173 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 174 for (i=0; i<nrows; i++) rows[i] += rstart; 175 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 180 { 181 PetscErrorCode ierr; 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 183 PetscInt i,m,n,*garray = aij->garray; 184 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 185 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 186 PetscReal *work; 187 const PetscScalar *dummy; 188 189 PetscFunctionBegin; 190 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 191 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 192 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 193 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 194 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 195 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 196 if (type == NORM_2) { 197 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 198 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 199 } 200 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 201 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 202 } 203 } else if (type == NORM_1) { 204 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 205 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 206 } 207 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 208 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 209 } 210 } else if (type == NORM_INFINITY) { 211 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 212 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 213 } 214 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 215 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 216 } 217 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 218 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 219 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 220 } 221 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 222 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 223 } 224 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 225 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 226 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 227 } 228 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 229 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 230 } 231 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 232 if (type == NORM_INFINITY) { 233 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 234 } else { 235 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 236 } 237 ierr = PetscFree(work);CHKERRQ(ierr); 238 if (type == NORM_2) { 239 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 240 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 241 for (i=0; i<n; i++) reductions[i] /= m; 242 } 243 PetscFunctionReturn(0); 244 } 245 246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 247 { 248 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 249 IS sis,gis; 250 PetscErrorCode ierr; 251 const PetscInt *isis,*igis; 252 PetscInt n,*iis,nsis,ngis,rstart,i; 253 254 PetscFunctionBegin; 255 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 256 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 257 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 258 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 259 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 260 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 261 262 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 263 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 264 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 265 n = ngis + nsis; 266 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 267 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 268 for (i=0; i<n; i++) iis[i] += rstart; 269 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 270 271 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 272 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 273 ierr = ISDestroy(&sis);CHKERRQ(ierr); 274 ierr = ISDestroy(&gis);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 278 /* 279 Local utility routine that creates a mapping from the global column 280 number to the local number in the off-diagonal part of the local 281 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 282 a slightly higher hash table cost; without it it is not scalable (each processor 283 has an order N integer array but is fast to access. 284 */ 285 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt n = aij->B->cmap->n,i; 290 291 PetscFunctionBegin; 292 if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 293 #if defined(PETSC_USE_CTABLE) 294 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 295 for (i=0; i<n; i++) { 296 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 297 } 298 #else 299 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 300 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 301 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 302 #endif 303 PetscFunctionReturn(0); 304 } 305 306 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 307 { \ 308 if (col <= lastcol1) low1 = 0; \ 309 else high1 = nrow1; \ 310 lastcol1 = col;\ 311 while (high1-low1 > 5) { \ 312 t = (low1+high1)/2; \ 313 if (rp1[t] > col) high1 = t; \ 314 else low1 = t; \ 315 } \ 316 for (_i=low1; _i<high1; _i++) { \ 317 if (rp1[_i] > col) break; \ 318 if (rp1[_i] == col) { \ 319 if (addv == ADD_VALUES) { \ 320 ap1[_i] += value; \ 321 /* Not sure LogFlops will slow dow the code or not */ \ 322 (void)PetscLogFlops(1.0); \ 323 } \ 324 else ap1[_i] = value; \ 325 inserted = PETSC_TRUE; \ 326 goto a_noinsert; \ 327 } \ 328 } \ 329 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 330 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 331 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 332 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 333 N = nrow1++ - 1; a->nz++; high1++; \ 334 /* shift up all the later entries in this row */ \ 335 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 336 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 337 rp1[_i] = col; \ 338 ap1[_i] = value; \ 339 A->nonzerostate++;\ 340 a_noinsert: ; \ 341 ailen[row] = nrow1; \ 342 } 343 344 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 345 { \ 346 if (col <= lastcol2) low2 = 0; \ 347 else high2 = nrow2; \ 348 lastcol2 = col; \ 349 while (high2-low2 > 5) { \ 350 t = (low2+high2)/2; \ 351 if (rp2[t] > col) high2 = t; \ 352 else low2 = t; \ 353 } \ 354 for (_i=low2; _i<high2; _i++) { \ 355 if (rp2[_i] > col) break; \ 356 if (rp2[_i] == col) { \ 357 if (addv == ADD_VALUES) { \ 358 ap2[_i] += value; \ 359 (void)PetscLogFlops(1.0); \ 360 } \ 361 else ap2[_i] = value; \ 362 inserted = PETSC_TRUE; \ 363 goto b_noinsert; \ 364 } \ 365 } \ 366 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 367 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 368 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 369 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 370 N = nrow2++ - 1; b->nz++; high2++; \ 371 /* shift up all the later entries in this row */ \ 372 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 373 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 374 rp2[_i] = col; \ 375 ap2[_i] = value; \ 376 B->nonzerostate++; \ 377 b_noinsert: ; \ 378 bilen[row] = nrow2; \ 379 } 380 381 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 382 { 383 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 384 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 385 PetscErrorCode ierr; 386 PetscInt l,*garray = mat->garray,diag; 387 388 PetscFunctionBegin; 389 /* code only works for square matrices A */ 390 391 /* find size of row to the left of the diagonal part */ 392 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 393 row = row - diag; 394 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 395 if (garray[b->j[b->i[row]+l]] > diag) break; 396 } 397 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 398 399 /* diagonal part */ 400 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 401 402 /* right of diagonal part */ 403 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 404 #if defined(PETSC_HAVE_DEVICE) 405 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 406 #endif 407 PetscFunctionReturn(0); 408 } 409 410 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscScalar value = 0.0; 414 PetscErrorCode ierr; 415 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 416 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 417 PetscBool roworiented = aij->roworiented; 418 419 /* Some Variables required in the macro */ 420 Mat A = aij->A; 421 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 422 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 423 PetscBool ignorezeroentries = a->ignorezeroentries; 424 Mat B = aij->B; 425 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 426 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 427 MatScalar *aa,*ba; 428 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 429 * cannot use "#if defined" inside a macro. */ 430 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 431 432 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 433 PetscInt nonew; 434 MatScalar *ap1,*ap2; 435 436 PetscFunctionBegin; 437 #if defined(PETSC_HAVE_DEVICE) 438 if (A->offloadmask == PETSC_OFFLOAD_GPU) { 439 const PetscScalar *dummy; 440 ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr); 441 ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr); 442 } 443 if (B->offloadmask == PETSC_OFFLOAD_GPU) { 444 const PetscScalar *dummy; 445 ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr); 446 ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr); 447 } 448 #endif 449 aa = a->a; 450 ba = b->a; 451 for (i=0; i<m; i++) { 452 if (im[i] < 0) continue; 453 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 454 if (im[i] >= rstart && im[i] < rend) { 455 row = im[i] - rstart; 456 lastcol1 = -1; 457 rp1 = aj + ai[row]; 458 ap1 = aa + ai[row]; 459 rmax1 = aimax[row]; 460 nrow1 = ailen[row]; 461 low1 = 0; 462 high1 = nrow1; 463 lastcol2 = -1; 464 rp2 = bj + bi[row]; 465 ap2 = ba + bi[row]; 466 rmax2 = bimax[row]; 467 nrow2 = bilen[row]; 468 low2 = 0; 469 high2 = nrow2; 470 471 for (j=0; j<n; j++) { 472 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 473 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 474 if (in[j] >= cstart && in[j] < cend) { 475 col = in[j] - cstart; 476 nonew = a->nonew; 477 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 478 #if defined(PETSC_HAVE_DEVICE) 479 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 480 #endif 481 } else if (in[j] < 0) continue; 482 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 483 else { 484 if (mat->was_assembled) { 485 if (!aij->colmap) { 486 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 487 } 488 #if defined(PETSC_USE_CTABLE) 489 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 490 col--; 491 #else 492 col = aij->colmap[in[j]] - 1; 493 #endif 494 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 495 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 496 col = in[j]; 497 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 498 B = aij->B; 499 b = (Mat_SeqAIJ*)B->data; 500 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 501 rp2 = bj + bi[row]; 502 ap2 = ba + bi[row]; 503 rmax2 = bimax[row]; 504 nrow2 = bilen[row]; 505 low2 = 0; 506 high2 = nrow2; 507 bm = aij->B->rmap->n; 508 ba = b->a; 509 inserted = PETSC_FALSE; 510 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 511 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 512 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 513 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 514 } 515 } else col = in[j]; 516 nonew = b->nonew; 517 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 518 #if defined(PETSC_HAVE_DEVICE) 519 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 520 #endif 521 } 522 } 523 } else { 524 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 525 if (!aij->donotstash) { 526 mat->assembled = PETSC_FALSE; 527 if (roworiented) { 528 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 529 } else { 530 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 531 } 532 } 533 } 534 } 535 PetscFunctionReturn(0); 536 } 537 538 /* 539 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 540 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 541 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 542 */ 543 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 544 { 545 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 546 Mat A = aij->A; /* diagonal part of the matrix */ 547 Mat B = aij->B; /* offdiagonal part of the matrix */ 548 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 549 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 550 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 551 PetscInt *ailen = a->ilen,*aj = a->j; 552 PetscInt *bilen = b->ilen,*bj = b->j; 553 PetscInt am = aij->A->rmap->n,j; 554 PetscInt diag_so_far = 0,dnz; 555 PetscInt offd_so_far = 0,onz; 556 557 PetscFunctionBegin; 558 /* Iterate over all rows of the matrix */ 559 for (j=0; j<am; j++) { 560 dnz = onz = 0; 561 /* Iterate over all non-zero columns of the current row */ 562 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 563 /* If column is in the diagonal */ 564 if (mat_j[col] >= cstart && mat_j[col] < cend) { 565 aj[diag_so_far++] = mat_j[col] - cstart; 566 dnz++; 567 } else { /* off-diagonal entries */ 568 bj[offd_so_far++] = mat_j[col]; 569 onz++; 570 } 571 } 572 ailen[j] = dnz; 573 bilen[j] = onz; 574 } 575 PetscFunctionReturn(0); 576 } 577 578 /* 579 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 580 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 581 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 582 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 583 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 584 */ 585 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 586 { 587 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 588 Mat A = aij->A; /* diagonal part of the matrix */ 589 Mat B = aij->B; /* offdiagonal part of the matrix */ 590 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 591 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 592 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 593 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 594 PetscInt *ailen = a->ilen,*aj = a->j; 595 PetscInt *bilen = b->ilen,*bj = b->j; 596 PetscInt am = aij->A->rmap->n,j; 597 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 598 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 599 PetscScalar *aa = a->a,*ba = b->a; 600 601 PetscFunctionBegin; 602 /* Iterate over all rows of the matrix */ 603 for (j=0; j<am; j++) { 604 dnz_row = onz_row = 0; 605 rowstart_offd = full_offd_i[j]; 606 rowstart_diag = full_diag_i[j]; 607 /* Iterate over all non-zero columns of the current row */ 608 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 609 /* If column is in the diagonal */ 610 if (mat_j[col] >= cstart && mat_j[col] < cend) { 611 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 612 aa[rowstart_diag+dnz_row] = mat_a[col]; 613 dnz_row++; 614 } else { /* off-diagonal entries */ 615 bj[rowstart_offd+onz_row] = mat_j[col]; 616 ba[rowstart_offd+onz_row] = mat_a[col]; 617 onz_row++; 618 } 619 } 620 ailen[j] = dnz_row; 621 bilen[j] = onz_row; 622 } 623 PetscFunctionReturn(0); 624 } 625 626 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 627 { 628 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 629 PetscErrorCode ierr; 630 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 631 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 632 633 PetscFunctionBegin; 634 for (i=0; i<m; i++) { 635 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 636 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 637 if (idxm[i] >= rstart && idxm[i] < rend) { 638 row = idxm[i] - rstart; 639 for (j=0; j<n; j++) { 640 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 641 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 642 if (idxn[j] >= cstart && idxn[j] < cend) { 643 col = idxn[j] - cstart; 644 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 645 } else { 646 if (!aij->colmap) { 647 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 648 } 649 #if defined(PETSC_USE_CTABLE) 650 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 651 col--; 652 #else 653 col = aij->colmap[idxn[j]] - 1; 654 #endif 655 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 656 else { 657 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 658 } 659 } 660 } 661 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 662 } 663 PetscFunctionReturn(0); 664 } 665 666 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscInt nstash,reallocs; 671 672 PetscFunctionBegin; 673 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 674 675 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 676 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 677 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 682 { 683 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 684 PetscErrorCode ierr; 685 PetscMPIInt n; 686 PetscInt i,j,rstart,ncols,flg; 687 PetscInt *row,*col; 688 PetscBool other_disassembled; 689 PetscScalar *val; 690 691 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 692 693 PetscFunctionBegin; 694 if (!aij->donotstash && !mat->nooffprocentries) { 695 while (1) { 696 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 697 if (!flg) break; 698 699 for (i=0; i<n;) { 700 /* Now identify the consecutive vals belonging to the same row */ 701 for (j=i,rstart=row[j]; j<n; j++) { 702 if (row[j] != rstart) break; 703 } 704 if (j < n) ncols = j-i; 705 else ncols = n-i; 706 /* Now assemble all these values with a single function call */ 707 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 708 i = j; 709 } 710 } 711 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 712 } 713 #if defined(PETSC_HAVE_DEVICE) 714 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 715 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 716 if (mat->boundtocpu) { 717 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 718 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 719 } 720 #endif 721 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 722 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 723 724 /* determine if any processor has disassembled, if so we must 725 also disassemble ourself, in order that we may reassemble. */ 726 /* 727 if nonzero structure of submatrix B cannot change then we know that 728 no processor disassembled thus we can skip this stuff 729 */ 730 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 731 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 732 if (mat->was_assembled && !other_disassembled) { 733 #if defined(PETSC_HAVE_DEVICE) 734 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 735 #endif 736 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 737 } 738 } 739 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 740 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 741 } 742 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 743 #if defined(PETSC_HAVE_DEVICE) 744 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 745 #endif 746 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 747 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 748 749 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 750 751 aij->rowvalues = NULL; 752 753 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 754 755 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 756 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 757 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 758 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 759 } 760 #if defined(PETSC_HAVE_DEVICE) 761 mat->offloadmask = PETSC_OFFLOAD_BOTH; 762 #endif 763 PetscFunctionReturn(0); 764 } 765 766 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 767 { 768 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 769 PetscErrorCode ierr; 770 771 PetscFunctionBegin; 772 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 773 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 774 PetscFunctionReturn(0); 775 } 776 777 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 778 { 779 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 780 PetscObjectState sA, sB; 781 PetscInt *lrows; 782 PetscInt r, len; 783 PetscBool cong, lch, gch; 784 PetscErrorCode ierr; 785 786 PetscFunctionBegin; 787 /* get locally owned rows */ 788 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 789 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 790 /* fix right hand side if needed */ 791 if (x && b) { 792 const PetscScalar *xx; 793 PetscScalar *bb; 794 795 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 796 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 797 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 798 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 799 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 800 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 801 } 802 803 sA = mat->A->nonzerostate; 804 sB = mat->B->nonzerostate; 805 806 if (diag != 0.0 && cong) { 807 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 808 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 810 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 811 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 812 PetscInt nnwA, nnwB; 813 PetscBool nnzA, nnzB; 814 815 nnwA = aijA->nonew; 816 nnwB = aijB->nonew; 817 nnzA = aijA->keepnonzeropattern; 818 nnzB = aijB->keepnonzeropattern; 819 if (!nnzA) { 820 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 821 aijA->nonew = 0; 822 } 823 if (!nnzB) { 824 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 825 aijB->nonew = 0; 826 } 827 /* Must zero here before the next loop */ 828 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 829 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 830 for (r = 0; r < len; ++r) { 831 const PetscInt row = lrows[r] + A->rmap->rstart; 832 if (row >= A->cmap->N) continue; 833 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 834 } 835 aijA->nonew = nnwA; 836 aijB->nonew = nnwB; 837 } else { 838 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 839 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 840 } 841 ierr = PetscFree(lrows);CHKERRQ(ierr); 842 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 843 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 844 845 /* reduce nonzerostate */ 846 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 847 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 848 if (gch) A->nonzerostate++; 849 PetscFunctionReturn(0); 850 } 851 852 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 853 { 854 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 855 PetscErrorCode ierr; 856 PetscMPIInt n = A->rmap->n; 857 PetscInt i,j,r,m,len = 0; 858 PetscInt *lrows,*owners = A->rmap->range; 859 PetscMPIInt p = 0; 860 PetscSFNode *rrows; 861 PetscSF sf; 862 const PetscScalar *xx; 863 PetscScalar *bb,*mask; 864 Vec xmask,lmask; 865 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 866 const PetscInt *aj, *ii,*ridx; 867 PetscScalar *aa; 868 869 PetscFunctionBegin; 870 /* Create SF where leaves are input rows and roots are owned rows */ 871 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 872 for (r = 0; r < n; ++r) lrows[r] = -1; 873 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 874 for (r = 0; r < N; ++r) { 875 const PetscInt idx = rows[r]; 876 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 877 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 878 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 879 } 880 rrows[r].rank = p; 881 rrows[r].index = rows[r] - owners[p]; 882 } 883 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 884 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 885 /* Collect flags for rows to be zeroed */ 886 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 887 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 888 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 889 /* Compress and put in row numbers */ 890 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 891 /* zero diagonal part of matrix */ 892 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 893 /* handle off diagonal part of matrix */ 894 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 895 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 896 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 897 for (i=0; i<len; i++) bb[lrows[i]] = 1; 898 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 899 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 902 if (x && b) { /* this code is buggy when the row and column layout don't match */ 903 PetscBool cong; 904 905 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 906 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 907 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 910 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 911 } 912 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 913 /* remove zeroed rows of off diagonal matrix */ 914 ii = aij->i; 915 for (i=0; i<len; i++) { 916 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 917 } 918 /* loop over all elements of off process part of matrix zeroing removed columns*/ 919 if (aij->compressedrow.use) { 920 m = aij->compressedrow.nrows; 921 ii = aij->compressedrow.i; 922 ridx = aij->compressedrow.rindex; 923 for (i=0; i<m; i++) { 924 n = ii[i+1] - ii[i]; 925 aj = aij->j + ii[i]; 926 aa = aij->a + ii[i]; 927 928 for (j=0; j<n; j++) { 929 if (PetscAbsScalar(mask[*aj])) { 930 if (b) bb[*ridx] -= *aa*xx[*aj]; 931 *aa = 0.0; 932 } 933 aa++; 934 aj++; 935 } 936 ridx++; 937 } 938 } else { /* do not use compressed row format */ 939 m = l->B->rmap->n; 940 for (i=0; i<m; i++) { 941 n = ii[i+1] - ii[i]; 942 aj = aij->j + ii[i]; 943 aa = aij->a + ii[i]; 944 for (j=0; j<n; j++) { 945 if (PetscAbsScalar(mask[*aj])) { 946 if (b) bb[i] -= *aa*xx[*aj]; 947 *aa = 0.0; 948 } 949 aa++; 950 aj++; 951 } 952 } 953 } 954 if (x && b) { 955 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 956 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 957 } 958 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 959 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 960 ierr = PetscFree(lrows);CHKERRQ(ierr); 961 962 /* only change matrix nonzero state if pattern was allowed to be changed */ 963 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 964 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 965 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 966 } 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 971 { 972 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 973 PetscErrorCode ierr; 974 PetscInt nt; 975 VecScatter Mvctx = a->Mvctx; 976 977 PetscFunctionBegin; 978 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 979 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 980 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 982 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 988 { 989 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 990 PetscErrorCode ierr; 991 992 PetscFunctionBegin; 993 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 998 { 999 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1000 PetscErrorCode ierr; 1001 VecScatter Mvctx = a->Mvctx; 1002 1003 PetscFunctionBegin; 1004 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1012 { 1013 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1014 PetscErrorCode ierr; 1015 1016 PetscFunctionBegin; 1017 /* do nondiagonal part */ 1018 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1019 /* do local part */ 1020 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1021 /* add partial results together */ 1022 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1023 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1028 { 1029 MPI_Comm comm; 1030 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1031 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1032 IS Me,Notme; 1033 PetscErrorCode ierr; 1034 PetscInt M,N,first,last,*notme,i; 1035 PetscBool lf; 1036 PetscMPIInt size; 1037 1038 PetscFunctionBegin; 1039 /* Easy test: symmetric diagonal block */ 1040 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1041 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1042 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr); 1043 if (!*f) PetscFunctionReturn(0); 1044 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1045 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1046 if (size == 1) PetscFunctionReturn(0); 1047 1048 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1049 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1050 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1051 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1052 for (i=0; i<first; i++) notme[i] = i; 1053 for (i=last; i<M; i++) notme[i-last+first] = i; 1054 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1055 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1056 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1057 Aoff = Aoffs[0]; 1058 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1059 Boff = Boffs[0]; 1060 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1061 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1062 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1063 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1064 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1065 ierr = PetscFree(notme);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1070 { 1071 PetscErrorCode ierr; 1072 1073 PetscFunctionBegin; 1074 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1079 { 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1081 PetscErrorCode ierr; 1082 1083 PetscFunctionBegin; 1084 /* do nondiagonal part */ 1085 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1086 /* do local part */ 1087 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1088 /* add partial results together */ 1089 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1090 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 /* 1095 This only works correctly for square matrices where the subblock A->A is the 1096 diagonal block 1097 */ 1098 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1099 { 1100 PetscErrorCode ierr; 1101 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1102 1103 PetscFunctionBegin; 1104 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1105 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1106 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1117 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1122 { 1123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 #if defined(PETSC_USE_LOG) 1128 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1129 #endif 1130 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1131 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1132 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1133 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1134 #if defined(PETSC_USE_CTABLE) 1135 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1136 #else 1137 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1138 #endif 1139 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1140 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1141 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1142 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1143 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1144 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1145 1146 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1147 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1148 1149 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1154 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1155 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1156 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1157 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1158 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1159 #if defined(PETSC_HAVE_CUDA) 1160 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1161 #endif 1162 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1164 #endif 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 1166 #if defined(PETSC_HAVE_ELEMENTAL) 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1168 #endif 1169 #if defined(PETSC_HAVE_SCALAPACK) 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1171 #endif 1172 #if defined(PETSC_HAVE_HYPRE) 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1175 #endif 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1179 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1181 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1182 #if defined(PETSC_HAVE_MKL_SPARSE) 1183 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1184 #endif 1185 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1186 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1187 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1192 { 1193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1194 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1195 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1196 const PetscInt *garray = aij->garray; 1197 const PetscScalar *aa,*ba; 1198 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1199 PetscInt *rowlens; 1200 PetscInt *colidxs; 1201 PetscScalar *matvals; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1206 1207 M = mat->rmap->N; 1208 N = mat->cmap->N; 1209 m = mat->rmap->n; 1210 rs = mat->rmap->rstart; 1211 cs = mat->cmap->rstart; 1212 nz = A->nz + B->nz; 1213 1214 /* write matrix header */ 1215 header[0] = MAT_FILE_CLASSID; 1216 header[1] = M; header[2] = N; header[3] = nz; 1217 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1218 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1219 1220 /* fill in and store row lengths */ 1221 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1222 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1223 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1224 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1225 1226 /* fill in and store column indices */ 1227 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1228 for (cnt=0, i=0; i<m; i++) { 1229 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1230 if (garray[B->j[jb]] > cs) break; 1231 colidxs[cnt++] = garray[B->j[jb]]; 1232 } 1233 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1234 colidxs[cnt++] = A->j[ja] + cs; 1235 for (; jb<B->i[i+1]; jb++) 1236 colidxs[cnt++] = garray[B->j[jb]]; 1237 } 1238 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1239 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1240 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1241 1242 /* fill in and store nonzero values */ 1243 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1244 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1245 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1246 for (cnt=0, i=0; i<m; i++) { 1247 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1248 if (garray[B->j[jb]] > cs) break; 1249 matvals[cnt++] = ba[jb]; 1250 } 1251 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1252 matvals[cnt++] = aa[ja]; 1253 for (; jb<B->i[i+1]; jb++) 1254 matvals[cnt++] = ba[jb]; 1255 } 1256 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1257 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1258 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1259 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1260 ierr = PetscFree(matvals);CHKERRQ(ierr); 1261 1262 /* write block size option to the viewer's .info file */ 1263 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #include <petscdraw.h> 1268 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1269 { 1270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1271 PetscErrorCode ierr; 1272 PetscMPIInt rank = aij->rank,size = aij->size; 1273 PetscBool isdraw,iascii,isbinary; 1274 PetscViewer sviewer; 1275 PetscViewerFormat format; 1276 1277 PetscFunctionBegin; 1278 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1279 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1280 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1281 if (iascii) { 1282 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1283 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1284 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1285 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1286 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1287 for (i=0; i<(PetscInt)size; i++) { 1288 nmax = PetscMax(nmax,nz[i]); 1289 nmin = PetscMin(nmin,nz[i]); 1290 navg += nz[i]; 1291 } 1292 ierr = PetscFree(nz);CHKERRQ(ierr); 1293 navg = navg/size; 1294 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1298 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1299 MatInfo info; 1300 PetscInt *inodes=NULL; 1301 1302 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1303 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1304 ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1306 if (!inodes) { 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1308 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1309 } else { 1310 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1311 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1312 } 1313 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1314 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1315 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1317 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1318 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1319 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1320 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1323 PetscInt inodecount,inodelimit,*inodes; 1324 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1325 if (inodes) { 1326 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1327 } else { 1328 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1329 } 1330 PetscFunctionReturn(0); 1331 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1332 PetscFunctionReturn(0); 1333 } 1334 } else if (isbinary) { 1335 if (size == 1) { 1336 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1337 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1338 } else { 1339 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1340 } 1341 PetscFunctionReturn(0); 1342 } else if (iascii && size == 1) { 1343 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1344 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } else if (isdraw) { 1347 PetscDraw draw; 1348 PetscBool isnull; 1349 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1350 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1351 if (isnull) PetscFunctionReturn(0); 1352 } 1353 1354 { /* assemble the entire matrix onto first processor */ 1355 Mat A = NULL, Av; 1356 IS isrow,iscol; 1357 1358 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1359 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1360 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1361 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1362 /* The commented code uses MatCreateSubMatrices instead */ 1363 /* 1364 Mat *AA, A = NULL, Av; 1365 IS isrow,iscol; 1366 1367 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1368 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1369 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1370 if (rank == 0) { 1371 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1372 A = AA[0]; 1373 Av = AA[0]; 1374 } 1375 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1376 */ 1377 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1378 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1379 /* 1380 Everyone has to call to draw the matrix since the graphics waits are 1381 synchronized across all processors that share the PetscDraw object 1382 */ 1383 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1384 if (rank == 0) { 1385 if (((PetscObject)mat)->name) { 1386 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1387 } 1388 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1389 } 1390 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1391 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1392 ierr = MatDestroy(&A);CHKERRQ(ierr); 1393 } 1394 PetscFunctionReturn(0); 1395 } 1396 1397 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1398 { 1399 PetscErrorCode ierr; 1400 PetscBool iascii,isdraw,issocket,isbinary; 1401 1402 PetscFunctionBegin; 1403 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1404 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1405 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1406 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1407 if (iascii || isdraw || isbinary || issocket) { 1408 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1409 } 1410 PetscFunctionReturn(0); 1411 } 1412 1413 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1414 { 1415 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1416 PetscErrorCode ierr; 1417 Vec bb1 = NULL; 1418 PetscBool hasop; 1419 1420 PetscFunctionBegin; 1421 if (flag == SOR_APPLY_UPPER) { 1422 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1427 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1428 } 1429 1430 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1433 its--; 1434 } 1435 1436 while (its--) { 1437 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1438 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1439 1440 /* update rhs: bb1 = bb - B*x */ 1441 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1442 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1443 1444 /* local sweep */ 1445 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1446 } 1447 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1448 if (flag & SOR_ZERO_INITIAL_GUESS) { 1449 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1450 its--; 1451 } 1452 while (its--) { 1453 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1454 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1455 1456 /* update rhs: bb1 = bb - B*x */ 1457 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1458 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1459 1460 /* local sweep */ 1461 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1462 } 1463 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1464 if (flag & SOR_ZERO_INITIAL_GUESS) { 1465 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1466 its--; 1467 } 1468 while (its--) { 1469 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1470 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1471 1472 /* update rhs: bb1 = bb - B*x */ 1473 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1474 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1475 1476 /* local sweep */ 1477 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1478 } 1479 } else if (flag & SOR_EISENSTAT) { 1480 Vec xx1; 1481 1482 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1483 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1484 1485 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1486 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1487 if (!mat->diag) { 1488 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1489 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1490 } 1491 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1492 if (hasop) { 1493 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1494 } else { 1495 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1496 } 1497 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1498 1499 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1500 1501 /* local sweep */ 1502 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1503 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1504 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1505 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1506 1507 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1508 1509 matin->factorerrortype = mat->A->factorerrortype; 1510 PetscFunctionReturn(0); 1511 } 1512 1513 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1514 { 1515 Mat aA,aB,Aperm; 1516 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1517 PetscScalar *aa,*ba; 1518 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1519 PetscSF rowsf,sf; 1520 IS parcolp = NULL; 1521 PetscBool done; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1526 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1527 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1528 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1529 1530 /* Invert row permutation to find out where my rows should go */ 1531 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1532 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1533 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1534 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1535 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1536 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1537 1538 /* Invert column permutation to find out where my columns should go */ 1539 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1540 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1541 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1542 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1543 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1544 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1545 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1546 1547 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1548 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1549 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1550 1551 /* Find out where my gcols should go */ 1552 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1553 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1554 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1555 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1556 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1557 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1558 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1559 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1560 1561 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1562 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1563 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1564 for (i=0; i<m; i++) { 1565 PetscInt row = rdest[i]; 1566 PetscMPIInt rowner; 1567 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1568 for (j=ai[i]; j<ai[i+1]; j++) { 1569 PetscInt col = cdest[aj[j]]; 1570 PetscMPIInt cowner; 1571 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1572 if (rowner == cowner) dnnz[i]++; 1573 else onnz[i]++; 1574 } 1575 for (j=bi[i]; j<bi[i+1]; j++) { 1576 PetscInt col = gcdest[bj[j]]; 1577 PetscMPIInt cowner; 1578 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1579 if (rowner == cowner) dnnz[i]++; 1580 else onnz[i]++; 1581 } 1582 } 1583 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1584 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1585 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1586 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1587 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1588 1589 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1590 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1591 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1592 for (i=0; i<m; i++) { 1593 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1594 PetscInt j0,rowlen; 1595 rowlen = ai[i+1] - ai[i]; 1596 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1597 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1598 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1599 } 1600 rowlen = bi[i+1] - bi[i]; 1601 for (j0=j=0; j<rowlen; j0=j) { 1602 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1603 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1604 } 1605 } 1606 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1607 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1608 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1609 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1610 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1611 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1612 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1613 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1614 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1615 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1616 *B = Aperm; 1617 PetscFunctionReturn(0); 1618 } 1619 1620 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1621 { 1622 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1623 PetscErrorCode ierr; 1624 1625 PetscFunctionBegin; 1626 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1627 if (ghosts) *ghosts = aij->garray; 1628 PetscFunctionReturn(0); 1629 } 1630 1631 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1632 { 1633 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1634 Mat A = mat->A,B = mat->B; 1635 PetscErrorCode ierr; 1636 PetscLogDouble isend[5],irecv[5]; 1637 1638 PetscFunctionBegin; 1639 info->block_size = 1.0; 1640 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1641 1642 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1643 isend[3] = info->memory; isend[4] = info->mallocs; 1644 1645 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1646 1647 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1648 isend[3] += info->memory; isend[4] += info->mallocs; 1649 if (flag == MAT_LOCAL) { 1650 info->nz_used = isend[0]; 1651 info->nz_allocated = isend[1]; 1652 info->nz_unneeded = isend[2]; 1653 info->memory = isend[3]; 1654 info->mallocs = isend[4]; 1655 } else if (flag == MAT_GLOBAL_MAX) { 1656 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1657 1658 info->nz_used = irecv[0]; 1659 info->nz_allocated = irecv[1]; 1660 info->nz_unneeded = irecv[2]; 1661 info->memory = irecv[3]; 1662 info->mallocs = irecv[4]; 1663 } else if (flag == MAT_GLOBAL_SUM) { 1664 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } 1672 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1673 info->fill_ratio_needed = 0; 1674 info->factor_mallocs = 0; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1679 { 1680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1681 PetscErrorCode ierr; 1682 1683 PetscFunctionBegin; 1684 switch (op) { 1685 case MAT_NEW_NONZERO_LOCATIONS: 1686 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1687 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1688 case MAT_KEEP_NONZERO_PATTERN: 1689 case MAT_NEW_NONZERO_LOCATION_ERR: 1690 case MAT_USE_INODES: 1691 case MAT_IGNORE_ZERO_ENTRIES: 1692 case MAT_FORM_EXPLICIT_TRANSPOSE: 1693 MatCheckPreallocated(A,1); 1694 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1695 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1696 break; 1697 case MAT_ROW_ORIENTED: 1698 MatCheckPreallocated(A,1); 1699 a->roworiented = flg; 1700 1701 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1702 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1703 break; 1704 case MAT_FORCE_DIAGONAL_ENTRIES: 1705 case MAT_SORTED_FULL: 1706 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1707 break; 1708 case MAT_IGNORE_OFF_PROC_ENTRIES: 1709 a->donotstash = flg; 1710 break; 1711 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1712 case MAT_SPD: 1713 case MAT_SYMMETRIC: 1714 case MAT_STRUCTURALLY_SYMMETRIC: 1715 case MAT_HERMITIAN: 1716 case MAT_SYMMETRY_ETERNAL: 1717 break; 1718 case MAT_SUBMAT_SINGLEIS: 1719 A->submat_singleis = flg; 1720 break; 1721 case MAT_STRUCTURE_ONLY: 1722 /* The option is handled directly by MatSetOption() */ 1723 break; 1724 default: 1725 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1726 } 1727 PetscFunctionReturn(0); 1728 } 1729 1730 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1731 { 1732 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1733 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1734 PetscErrorCode ierr; 1735 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1736 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1737 PetscInt *cmap,*idx_p; 1738 1739 PetscFunctionBegin; 1740 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1741 mat->getrowactive = PETSC_TRUE; 1742 1743 if (!mat->rowvalues && (idx || v)) { 1744 /* 1745 allocate enough space to hold information from the longest row. 1746 */ 1747 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1748 PetscInt max = 1,tmp; 1749 for (i=0; i<matin->rmap->n; i++) { 1750 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1751 if (max < tmp) max = tmp; 1752 } 1753 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1754 } 1755 1756 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1757 lrow = row - rstart; 1758 1759 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1760 if (!v) {pvA = NULL; pvB = NULL;} 1761 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1762 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1763 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1764 nztot = nzA + nzB; 1765 1766 cmap = mat->garray; 1767 if (v || idx) { 1768 if (nztot) { 1769 /* Sort by increasing column numbers, assuming A and B already sorted */ 1770 PetscInt imark = -1; 1771 if (v) { 1772 *v = v_p = mat->rowvalues; 1773 for (i=0; i<nzB; i++) { 1774 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1775 else break; 1776 } 1777 imark = i; 1778 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1779 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1780 } 1781 if (idx) { 1782 *idx = idx_p = mat->rowindices; 1783 if (imark > -1) { 1784 for (i=0; i<imark; i++) { 1785 idx_p[i] = cmap[cworkB[i]]; 1786 } 1787 } else { 1788 for (i=0; i<nzB; i++) { 1789 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1790 else break; 1791 } 1792 imark = i; 1793 } 1794 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1795 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1796 } 1797 } else { 1798 if (idx) *idx = NULL; 1799 if (v) *v = NULL; 1800 } 1801 } 1802 *nz = nztot; 1803 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1804 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1805 PetscFunctionReturn(0); 1806 } 1807 1808 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1809 { 1810 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1811 1812 PetscFunctionBegin; 1813 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1814 aij->getrowactive = PETSC_FALSE; 1815 PetscFunctionReturn(0); 1816 } 1817 1818 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1819 { 1820 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1821 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1822 PetscErrorCode ierr; 1823 PetscInt i,j,cstart = mat->cmap->rstart; 1824 PetscReal sum = 0.0; 1825 MatScalar *v; 1826 1827 PetscFunctionBegin; 1828 if (aij->size == 1) { 1829 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1830 } else { 1831 if (type == NORM_FROBENIUS) { 1832 v = amat->a; 1833 for (i=0; i<amat->nz; i++) { 1834 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1835 } 1836 v = bmat->a; 1837 for (i=0; i<bmat->nz; i++) { 1838 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1839 } 1840 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1841 *norm = PetscSqrtReal(*norm); 1842 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1843 } else if (type == NORM_1) { /* max column norm */ 1844 PetscReal *tmp,*tmp2; 1845 PetscInt *jj,*garray = aij->garray; 1846 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1847 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1848 *norm = 0.0; 1849 v = amat->a; jj = amat->j; 1850 for (j=0; j<amat->nz; j++) { 1851 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1852 } 1853 v = bmat->a; jj = bmat->j; 1854 for (j=0; j<bmat->nz; j++) { 1855 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1856 } 1857 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1858 for (j=0; j<mat->cmap->N; j++) { 1859 if (tmp2[j] > *norm) *norm = tmp2[j]; 1860 } 1861 ierr = PetscFree(tmp);CHKERRQ(ierr); 1862 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1863 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1864 } else if (type == NORM_INFINITY) { /* max row norm */ 1865 PetscReal ntemp = 0.0; 1866 for (j=0; j<aij->A->rmap->n; j++) { 1867 v = amat->a + amat->i[j]; 1868 sum = 0.0; 1869 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1870 sum += PetscAbsScalar(*v); v++; 1871 } 1872 v = bmat->a + bmat->i[j]; 1873 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1874 sum += PetscAbsScalar(*v); v++; 1875 } 1876 if (sum > ntemp) ntemp = sum; 1877 } 1878 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1879 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1880 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1881 } 1882 PetscFunctionReturn(0); 1883 } 1884 1885 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1886 { 1887 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1888 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1889 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1890 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1891 PetscErrorCode ierr; 1892 Mat B,A_diag,*B_diag; 1893 const MatScalar *pbv,*bv; 1894 1895 PetscFunctionBegin; 1896 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1897 ai = Aloc->i; aj = Aloc->j; 1898 bi = Bloc->i; bj = Bloc->j; 1899 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1900 PetscInt *d_nnz,*g_nnz,*o_nnz; 1901 PetscSFNode *oloc; 1902 PETSC_UNUSED PetscSF sf; 1903 1904 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1905 /* compute d_nnz for preallocation */ 1906 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1907 for (i=0; i<ai[ma]; i++) { 1908 d_nnz[aj[i]]++; 1909 } 1910 /* compute local off-diagonal contributions */ 1911 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1912 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1913 /* map those to global */ 1914 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1915 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1916 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1917 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1918 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1919 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1920 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1921 1922 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1923 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1924 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1925 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1926 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1927 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1928 } else { 1929 B = *matout; 1930 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1931 } 1932 1933 b = (Mat_MPIAIJ*)B->data; 1934 A_diag = a->A; 1935 B_diag = &b->A; 1936 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1937 A_diag_ncol = A_diag->cmap->N; 1938 B_diag_ilen = sub_B_diag->ilen; 1939 B_diag_i = sub_B_diag->i; 1940 1941 /* Set ilen for diagonal of B */ 1942 for (i=0; i<A_diag_ncol; i++) { 1943 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1944 } 1945 1946 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1947 very quickly (=without using MatSetValues), because all writes are local. */ 1948 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1949 1950 /* copy over the B part */ 1951 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1952 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1953 pbv = bv; 1954 row = A->rmap->rstart; 1955 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1956 cols_tmp = cols; 1957 for (i=0; i<mb; i++) { 1958 ncol = bi[i+1]-bi[i]; 1959 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1960 row++; 1961 pbv += ncol; cols_tmp += ncol; 1962 } 1963 ierr = PetscFree(cols);CHKERRQ(ierr); 1964 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1965 1966 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1967 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1968 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1969 *matout = B; 1970 } else { 1971 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1972 } 1973 PetscFunctionReturn(0); 1974 } 1975 1976 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1977 { 1978 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1979 Mat a = aij->A,b = aij->B; 1980 PetscErrorCode ierr; 1981 PetscInt s1,s2,s3; 1982 1983 PetscFunctionBegin; 1984 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1985 if (rr) { 1986 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1987 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1988 /* Overlap communication with computation. */ 1989 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1990 } 1991 if (ll) { 1992 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1993 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1994 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1995 } 1996 /* scale the diagonal block */ 1997 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1998 1999 if (rr) { 2000 /* Do a scatter end and then right scale the off-diagonal block */ 2001 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2002 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2003 } 2004 PetscFunctionReturn(0); 2005 } 2006 2007 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2008 { 2009 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2010 PetscErrorCode ierr; 2011 2012 PetscFunctionBegin; 2013 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2014 PetscFunctionReturn(0); 2015 } 2016 2017 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2018 { 2019 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2020 Mat a,b,c,d; 2021 PetscBool flg; 2022 PetscErrorCode ierr; 2023 2024 PetscFunctionBegin; 2025 a = matA->A; b = matA->B; 2026 c = matB->A; d = matB->B; 2027 2028 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2029 if (flg) { 2030 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2031 } 2032 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2033 PetscFunctionReturn(0); 2034 } 2035 2036 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2037 { 2038 PetscErrorCode ierr; 2039 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2040 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2041 2042 PetscFunctionBegin; 2043 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2044 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2045 /* because of the column compression in the off-processor part of the matrix a->B, 2046 the number of columns in a->B and b->B may be different, hence we cannot call 2047 the MatCopy() directly on the two parts. If need be, we can provide a more 2048 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2049 then copying the submatrices */ 2050 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2051 } else { 2052 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2053 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2054 } 2055 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2056 PetscFunctionReturn(0); 2057 } 2058 2059 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2060 { 2061 PetscErrorCode ierr; 2062 2063 PetscFunctionBegin; 2064 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /* 2069 Computes the number of nonzeros per row needed for preallocation when X and Y 2070 have different nonzero structure. 2071 */ 2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2073 { 2074 PetscInt i,j,k,nzx,nzy; 2075 2076 PetscFunctionBegin; 2077 /* Set the number of nonzeros in the new matrix */ 2078 for (i=0; i<m; i++) { 2079 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2080 nzx = xi[i+1] - xi[i]; 2081 nzy = yi[i+1] - yi[i]; 2082 nnz[i] = 0; 2083 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2084 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2085 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2086 nnz[i]++; 2087 } 2088 for (; k<nzy; k++) nnz[i]++; 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2095 { 2096 PetscErrorCode ierr; 2097 PetscInt m = Y->rmap->N; 2098 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2099 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2100 2101 PetscFunctionBegin; 2102 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2103 PetscFunctionReturn(0); 2104 } 2105 2106 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2107 { 2108 PetscErrorCode ierr; 2109 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2110 2111 PetscFunctionBegin; 2112 if (str == SAME_NONZERO_PATTERN) { 2113 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2114 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2115 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2116 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2117 } else { 2118 Mat B; 2119 PetscInt *nnz_d,*nnz_o; 2120 2121 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2122 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2123 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2124 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2125 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2126 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2127 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2128 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2129 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2130 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2131 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2132 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2133 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2134 } 2135 PetscFunctionReturn(0); 2136 } 2137 2138 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2139 2140 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2141 { 2142 #if defined(PETSC_USE_COMPLEX) 2143 PetscErrorCode ierr; 2144 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2145 2146 PetscFunctionBegin; 2147 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2148 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2149 #else 2150 PetscFunctionBegin; 2151 #endif 2152 PetscFunctionReturn(0); 2153 } 2154 2155 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2156 { 2157 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2158 PetscErrorCode ierr; 2159 2160 PetscFunctionBegin; 2161 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2162 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2167 { 2168 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2173 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2178 { 2179 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2180 PetscErrorCode ierr; 2181 PetscInt i,*idxb = NULL,m = A->rmap->n; 2182 PetscScalar *va,*vv; 2183 Vec vB,vA; 2184 const PetscScalar *vb; 2185 2186 PetscFunctionBegin; 2187 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2188 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2189 2190 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2191 if (idx) { 2192 for (i=0; i<m; i++) { 2193 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2194 } 2195 } 2196 2197 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2198 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2199 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2200 2201 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2202 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2203 for (i=0; i<m; i++) { 2204 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2205 vv[i] = vb[i]; 2206 if (idx) idx[i] = a->garray[idxb[i]]; 2207 } else { 2208 vv[i] = va[i]; 2209 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2210 idx[i] = a->garray[idxb[i]]; 2211 } 2212 } 2213 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2214 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2215 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2216 ierr = PetscFree(idxb);CHKERRQ(ierr); 2217 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2218 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2219 PetscFunctionReturn(0); 2220 } 2221 2222 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2223 { 2224 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2225 PetscInt m = A->rmap->n,n = A->cmap->n; 2226 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2227 PetscInt *cmap = mat->garray; 2228 PetscInt *diagIdx, *offdiagIdx; 2229 Vec diagV, offdiagV; 2230 PetscScalar *a, *diagA, *offdiagA; 2231 const PetscScalar *ba,*bav; 2232 PetscInt r,j,col,ncols,*bi,*bj; 2233 PetscErrorCode ierr; 2234 Mat B = mat->B; 2235 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2236 2237 PetscFunctionBegin; 2238 /* When a process holds entire A and other processes have no entry */ 2239 if (A->cmap->N == n) { 2240 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2241 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2242 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2243 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2244 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2245 PetscFunctionReturn(0); 2246 } else if (n == 0) { 2247 if (m) { 2248 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2249 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2250 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2251 } 2252 PetscFunctionReturn(0); 2253 } 2254 2255 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2256 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2257 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2258 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2259 2260 /* Get offdiagIdx[] for implicit 0.0 */ 2261 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2262 ba = bav; 2263 bi = b->i; 2264 bj = b->j; 2265 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2266 for (r = 0; r < m; r++) { 2267 ncols = bi[r+1] - bi[r]; 2268 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2269 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2270 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2271 offdiagA[r] = 0.0; 2272 2273 /* Find first hole in the cmap */ 2274 for (j=0; j<ncols; j++) { 2275 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2276 if (col > j && j < cstart) { 2277 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2278 break; 2279 } else if (col > j + n && j >= cstart) { 2280 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2281 break; 2282 } 2283 } 2284 if (j == ncols && ncols < A->cmap->N - n) { 2285 /* a hole is outside compressed Bcols */ 2286 if (ncols == 0) { 2287 if (cstart) { 2288 offdiagIdx[r] = 0; 2289 } else offdiagIdx[r] = cend; 2290 } else { /* ncols > 0 */ 2291 offdiagIdx[r] = cmap[ncols-1] + 1; 2292 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2293 } 2294 } 2295 } 2296 2297 for (j=0; j<ncols; j++) { 2298 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2299 ba++; bj++; 2300 } 2301 } 2302 2303 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2304 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2305 for (r = 0; r < m; ++r) { 2306 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2307 a[r] = diagA[r]; 2308 if (idx) idx[r] = cstart + diagIdx[r]; 2309 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2310 a[r] = diagA[r]; 2311 if (idx) { 2312 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2313 idx[r] = cstart + diagIdx[r]; 2314 } else idx[r] = offdiagIdx[r]; 2315 } 2316 } else { 2317 a[r] = offdiagA[r]; 2318 if (idx) idx[r] = offdiagIdx[r]; 2319 } 2320 } 2321 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2322 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2323 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2324 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2325 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2326 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2327 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2328 PetscFunctionReturn(0); 2329 } 2330 2331 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2332 { 2333 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2334 PetscInt m = A->rmap->n,n = A->cmap->n; 2335 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2336 PetscInt *cmap = mat->garray; 2337 PetscInt *diagIdx, *offdiagIdx; 2338 Vec diagV, offdiagV; 2339 PetscScalar *a, *diagA, *offdiagA; 2340 const PetscScalar *ba,*bav; 2341 PetscInt r,j,col,ncols,*bi,*bj; 2342 PetscErrorCode ierr; 2343 Mat B = mat->B; 2344 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2345 2346 PetscFunctionBegin; 2347 /* When a process holds entire A and other processes have no entry */ 2348 if (A->cmap->N == n) { 2349 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2350 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2351 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2352 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2353 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2354 PetscFunctionReturn(0); 2355 } else if (n == 0) { 2356 if (m) { 2357 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2358 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2359 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2360 } 2361 PetscFunctionReturn(0); 2362 } 2363 2364 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2365 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2366 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2367 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2368 2369 /* Get offdiagIdx[] for implicit 0.0 */ 2370 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2371 ba = bav; 2372 bi = b->i; 2373 bj = b->j; 2374 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2375 for (r = 0; r < m; r++) { 2376 ncols = bi[r+1] - bi[r]; 2377 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2378 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2379 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2380 offdiagA[r] = 0.0; 2381 2382 /* Find first hole in the cmap */ 2383 for (j=0; j<ncols; j++) { 2384 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2385 if (col > j && j < cstart) { 2386 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2387 break; 2388 } else if (col > j + n && j >= cstart) { 2389 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2390 break; 2391 } 2392 } 2393 if (j == ncols && ncols < A->cmap->N - n) { 2394 /* a hole is outside compressed Bcols */ 2395 if (ncols == 0) { 2396 if (cstart) { 2397 offdiagIdx[r] = 0; 2398 } else offdiagIdx[r] = cend; 2399 } else { /* ncols > 0 */ 2400 offdiagIdx[r] = cmap[ncols-1] + 1; 2401 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2402 } 2403 } 2404 } 2405 2406 for (j=0; j<ncols; j++) { 2407 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2408 ba++; bj++; 2409 } 2410 } 2411 2412 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2413 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2414 for (r = 0; r < m; ++r) { 2415 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2416 a[r] = diagA[r]; 2417 if (idx) idx[r] = cstart + diagIdx[r]; 2418 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2419 a[r] = diagA[r]; 2420 if (idx) { 2421 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2422 idx[r] = cstart + diagIdx[r]; 2423 } else idx[r] = offdiagIdx[r]; 2424 } 2425 } else { 2426 a[r] = offdiagA[r]; 2427 if (idx) idx[r] = offdiagIdx[r]; 2428 } 2429 } 2430 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2431 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2432 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2433 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2434 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2435 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2436 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2437 PetscFunctionReturn(0); 2438 } 2439 2440 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2441 { 2442 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2443 PetscInt m = A->rmap->n,n = A->cmap->n; 2444 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2445 PetscInt *cmap = mat->garray; 2446 PetscInt *diagIdx, *offdiagIdx; 2447 Vec diagV, offdiagV; 2448 PetscScalar *a, *diagA, *offdiagA; 2449 const PetscScalar *ba,*bav; 2450 PetscInt r,j,col,ncols,*bi,*bj; 2451 PetscErrorCode ierr; 2452 Mat B = mat->B; 2453 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2454 2455 PetscFunctionBegin; 2456 /* When a process holds entire A and other processes have no entry */ 2457 if (A->cmap->N == n) { 2458 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2459 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2460 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2461 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2462 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2463 PetscFunctionReturn(0); 2464 } else if (n == 0) { 2465 if (m) { 2466 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2467 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2468 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2469 } 2470 PetscFunctionReturn(0); 2471 } 2472 2473 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2474 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2475 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2476 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2477 2478 /* Get offdiagIdx[] for implicit 0.0 */ 2479 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2480 ba = bav; 2481 bi = b->i; 2482 bj = b->j; 2483 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2484 for (r = 0; r < m; r++) { 2485 ncols = bi[r+1] - bi[r]; 2486 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2487 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2488 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2489 offdiagA[r] = 0.0; 2490 2491 /* Find first hole in the cmap */ 2492 for (j=0; j<ncols; j++) { 2493 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2494 if (col > j && j < cstart) { 2495 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2496 break; 2497 } else if (col > j + n && j >= cstart) { 2498 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2499 break; 2500 } 2501 } 2502 if (j == ncols && ncols < A->cmap->N - n) { 2503 /* a hole is outside compressed Bcols */ 2504 if (ncols == 0) { 2505 if (cstart) { 2506 offdiagIdx[r] = 0; 2507 } else offdiagIdx[r] = cend; 2508 } else { /* ncols > 0 */ 2509 offdiagIdx[r] = cmap[ncols-1] + 1; 2510 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2511 } 2512 } 2513 } 2514 2515 for (j=0; j<ncols; j++) { 2516 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2517 ba++; bj++; 2518 } 2519 } 2520 2521 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2522 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2523 for (r = 0; r < m; ++r) { 2524 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2525 a[r] = diagA[r]; 2526 if (idx) idx[r] = cstart + diagIdx[r]; 2527 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2528 a[r] = diagA[r]; 2529 if (idx) { 2530 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2531 idx[r] = cstart + diagIdx[r]; 2532 } else idx[r] = offdiagIdx[r]; 2533 } 2534 } else { 2535 a[r] = offdiagA[r]; 2536 if (idx) idx[r] = offdiagIdx[r]; 2537 } 2538 } 2539 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2540 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2541 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2542 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2543 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2544 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2545 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2550 { 2551 PetscErrorCode ierr; 2552 Mat *dummy; 2553 2554 PetscFunctionBegin; 2555 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2556 *newmat = *dummy; 2557 ierr = PetscFree(dummy);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2562 { 2563 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2564 PetscErrorCode ierr; 2565 2566 PetscFunctionBegin; 2567 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2568 A->factorerrortype = a->A->factorerrortype; 2569 PetscFunctionReturn(0); 2570 } 2571 2572 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2573 { 2574 PetscErrorCode ierr; 2575 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2576 2577 PetscFunctionBegin; 2578 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2579 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2580 if (x->assembled) { 2581 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2582 } else { 2583 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2584 } 2585 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2586 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2587 PetscFunctionReturn(0); 2588 } 2589 2590 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2591 { 2592 PetscFunctionBegin; 2593 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2594 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2595 PetscFunctionReturn(0); 2596 } 2597 2598 /*@ 2599 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2600 2601 Collective on Mat 2602 2603 Input Parameters: 2604 + A - the matrix 2605 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2606 2607 Level: advanced 2608 2609 @*/ 2610 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2611 { 2612 PetscErrorCode ierr; 2613 2614 PetscFunctionBegin; 2615 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2616 PetscFunctionReturn(0); 2617 } 2618 2619 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2620 { 2621 PetscErrorCode ierr; 2622 PetscBool sc = PETSC_FALSE,flg; 2623 2624 PetscFunctionBegin; 2625 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2626 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2627 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2628 if (flg) { 2629 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2630 } 2631 ierr = PetscOptionsTail();CHKERRQ(ierr); 2632 PetscFunctionReturn(0); 2633 } 2634 2635 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2636 { 2637 PetscErrorCode ierr; 2638 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2639 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2640 2641 PetscFunctionBegin; 2642 if (!Y->preallocated) { 2643 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2644 } else if (!aij->nz) { 2645 PetscInt nonew = aij->nonew; 2646 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2647 aij->nonew = nonew; 2648 } 2649 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2650 PetscFunctionReturn(0); 2651 } 2652 2653 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2654 { 2655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2656 PetscErrorCode ierr; 2657 2658 PetscFunctionBegin; 2659 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2660 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2661 if (d) { 2662 PetscInt rstart; 2663 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2664 *d += rstart; 2665 2666 } 2667 PetscFunctionReturn(0); 2668 } 2669 2670 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2671 { 2672 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2673 PetscErrorCode ierr; 2674 2675 PetscFunctionBegin; 2676 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2677 PetscFunctionReturn(0); 2678 } 2679 2680 /* -------------------------------------------------------------------*/ 2681 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2682 MatGetRow_MPIAIJ, 2683 MatRestoreRow_MPIAIJ, 2684 MatMult_MPIAIJ, 2685 /* 4*/ MatMultAdd_MPIAIJ, 2686 MatMultTranspose_MPIAIJ, 2687 MatMultTransposeAdd_MPIAIJ, 2688 NULL, 2689 NULL, 2690 NULL, 2691 /*10*/ NULL, 2692 NULL, 2693 NULL, 2694 MatSOR_MPIAIJ, 2695 MatTranspose_MPIAIJ, 2696 /*15*/ MatGetInfo_MPIAIJ, 2697 MatEqual_MPIAIJ, 2698 MatGetDiagonal_MPIAIJ, 2699 MatDiagonalScale_MPIAIJ, 2700 MatNorm_MPIAIJ, 2701 /*20*/ MatAssemblyBegin_MPIAIJ, 2702 MatAssemblyEnd_MPIAIJ, 2703 MatSetOption_MPIAIJ, 2704 MatZeroEntries_MPIAIJ, 2705 /*24*/ MatZeroRows_MPIAIJ, 2706 NULL, 2707 NULL, 2708 NULL, 2709 NULL, 2710 /*29*/ MatSetUp_MPIAIJ, 2711 NULL, 2712 NULL, 2713 MatGetDiagonalBlock_MPIAIJ, 2714 NULL, 2715 /*34*/ MatDuplicate_MPIAIJ, 2716 NULL, 2717 NULL, 2718 NULL, 2719 NULL, 2720 /*39*/ MatAXPY_MPIAIJ, 2721 MatCreateSubMatrices_MPIAIJ, 2722 MatIncreaseOverlap_MPIAIJ, 2723 MatGetValues_MPIAIJ, 2724 MatCopy_MPIAIJ, 2725 /*44*/ MatGetRowMax_MPIAIJ, 2726 MatScale_MPIAIJ, 2727 MatShift_MPIAIJ, 2728 MatDiagonalSet_MPIAIJ, 2729 MatZeroRowsColumns_MPIAIJ, 2730 /*49*/ MatSetRandom_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 NULL, 2735 /*54*/ MatFDColoringCreate_MPIXAIJ, 2736 NULL, 2737 MatSetUnfactored_MPIAIJ, 2738 MatPermute_MPIAIJ, 2739 NULL, 2740 /*59*/ MatCreateSubMatrix_MPIAIJ, 2741 MatDestroy_MPIAIJ, 2742 MatView_MPIAIJ, 2743 NULL, 2744 NULL, 2745 /*64*/ NULL, 2746 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2747 NULL, 2748 NULL, 2749 NULL, 2750 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2751 MatGetRowMinAbs_MPIAIJ, 2752 NULL, 2753 NULL, 2754 NULL, 2755 NULL, 2756 /*75*/ MatFDColoringApply_AIJ, 2757 MatSetFromOptions_MPIAIJ, 2758 NULL, 2759 NULL, 2760 MatFindZeroDiagonals_MPIAIJ, 2761 /*80*/ NULL, 2762 NULL, 2763 NULL, 2764 /*83*/ MatLoad_MPIAIJ, 2765 MatIsSymmetric_MPIAIJ, 2766 NULL, 2767 NULL, 2768 NULL, 2769 NULL, 2770 /*89*/ NULL, 2771 NULL, 2772 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2773 NULL, 2774 NULL, 2775 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 NULL, 2779 MatBindToCPU_MPIAIJ, 2780 /*99*/ MatProductSetFromOptions_MPIAIJ, 2781 NULL, 2782 NULL, 2783 MatConjugate_MPIAIJ, 2784 NULL, 2785 /*104*/MatSetValuesRow_MPIAIJ, 2786 MatRealPart_MPIAIJ, 2787 MatImaginaryPart_MPIAIJ, 2788 NULL, 2789 NULL, 2790 /*109*/NULL, 2791 NULL, 2792 MatGetRowMin_MPIAIJ, 2793 NULL, 2794 MatMissingDiagonal_MPIAIJ, 2795 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2796 NULL, 2797 MatGetGhosts_MPIAIJ, 2798 NULL, 2799 NULL, 2800 /*119*/MatMultDiagonalBlock_MPIAIJ, 2801 NULL, 2802 NULL, 2803 NULL, 2804 MatGetMultiProcBlock_MPIAIJ, 2805 /*124*/MatFindNonzeroRows_MPIAIJ, 2806 MatGetColumnReductions_MPIAIJ, 2807 MatInvertBlockDiagonal_MPIAIJ, 2808 MatInvertVariableBlockDiagonal_MPIAIJ, 2809 MatCreateSubMatricesMPI_MPIAIJ, 2810 /*129*/NULL, 2811 NULL, 2812 NULL, 2813 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2814 NULL, 2815 /*134*/NULL, 2816 NULL, 2817 NULL, 2818 NULL, 2819 NULL, 2820 /*139*/MatSetBlockSizes_MPIAIJ, 2821 NULL, 2822 NULL, 2823 MatFDColoringSetUp_MPIXAIJ, 2824 MatFindOffBlockDiagonalEntries_MPIAIJ, 2825 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2826 /*145*/NULL, 2827 NULL, 2828 NULL 2829 }; 2830 2831 /* ----------------------------------------------------------------------------------------*/ 2832 2833 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2834 { 2835 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2836 PetscErrorCode ierr; 2837 2838 PetscFunctionBegin; 2839 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2840 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2841 PetscFunctionReturn(0); 2842 } 2843 2844 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2845 { 2846 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2847 PetscErrorCode ierr; 2848 2849 PetscFunctionBegin; 2850 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2851 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2852 PetscFunctionReturn(0); 2853 } 2854 2855 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2856 { 2857 Mat_MPIAIJ *b; 2858 PetscErrorCode ierr; 2859 PetscMPIInt size; 2860 2861 PetscFunctionBegin; 2862 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2863 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2864 b = (Mat_MPIAIJ*)B->data; 2865 2866 #if defined(PETSC_USE_CTABLE) 2867 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2868 #else 2869 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2870 #endif 2871 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2872 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2873 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2874 2875 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2876 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2877 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2878 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2879 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2880 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2881 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2882 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2883 2884 if (!B->preallocated) { 2885 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2886 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2887 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2888 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2889 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2890 } 2891 2892 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2893 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2894 B->preallocated = PETSC_TRUE; 2895 B->was_assembled = PETSC_FALSE; 2896 B->assembled = PETSC_FALSE; 2897 PetscFunctionReturn(0); 2898 } 2899 2900 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2901 { 2902 Mat_MPIAIJ *b; 2903 PetscErrorCode ierr; 2904 2905 PetscFunctionBegin; 2906 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2907 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2908 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2909 b = (Mat_MPIAIJ*)B->data; 2910 2911 #if defined(PETSC_USE_CTABLE) 2912 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2913 #else 2914 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2915 #endif 2916 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2917 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2918 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2919 2920 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2921 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2922 B->preallocated = PETSC_TRUE; 2923 B->was_assembled = PETSC_FALSE; 2924 B->assembled = PETSC_FALSE; 2925 PetscFunctionReturn(0); 2926 } 2927 2928 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2929 { 2930 Mat mat; 2931 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2932 PetscErrorCode ierr; 2933 2934 PetscFunctionBegin; 2935 *newmat = NULL; 2936 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2937 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2938 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2939 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2940 a = (Mat_MPIAIJ*)mat->data; 2941 2942 mat->factortype = matin->factortype; 2943 mat->assembled = matin->assembled; 2944 mat->insertmode = NOT_SET_VALUES; 2945 mat->preallocated = matin->preallocated; 2946 2947 a->size = oldmat->size; 2948 a->rank = oldmat->rank; 2949 a->donotstash = oldmat->donotstash; 2950 a->roworiented = oldmat->roworiented; 2951 a->rowindices = NULL; 2952 a->rowvalues = NULL; 2953 a->getrowactive = PETSC_FALSE; 2954 2955 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2956 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2957 2958 if (oldmat->colmap) { 2959 #if defined(PETSC_USE_CTABLE) 2960 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2961 #else 2962 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2963 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2964 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2965 #endif 2966 } else a->colmap = NULL; 2967 if (oldmat->garray) { 2968 PetscInt len; 2969 len = oldmat->B->cmap->n; 2970 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2971 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2972 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2973 } else a->garray = NULL; 2974 2975 /* It may happen MatDuplicate is called with a non-assembled matrix 2976 In fact, MatDuplicate only requires the matrix to be preallocated 2977 This may happen inside a DMCreateMatrix_Shell */ 2978 if (oldmat->lvec) { 2979 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2980 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2981 } 2982 if (oldmat->Mvctx) { 2983 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2984 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2985 } 2986 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2987 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2988 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2989 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2990 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2991 *newmat = mat; 2992 PetscFunctionReturn(0); 2993 } 2994 2995 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2996 { 2997 PetscBool isbinary, ishdf5; 2998 PetscErrorCode ierr; 2999 3000 PetscFunctionBegin; 3001 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3002 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3003 /* force binary viewer to load .info file if it has not yet done so */ 3004 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3005 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3006 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3007 if (isbinary) { 3008 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3009 } else if (ishdf5) { 3010 #if defined(PETSC_HAVE_HDF5) 3011 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3012 #else 3013 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3014 #endif 3015 } else { 3016 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3017 } 3018 PetscFunctionReturn(0); 3019 } 3020 3021 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3022 { 3023 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3024 PetscInt *rowidxs,*colidxs; 3025 PetscScalar *matvals; 3026 PetscErrorCode ierr; 3027 3028 PetscFunctionBegin; 3029 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3030 3031 /* read in matrix header */ 3032 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3033 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3034 M = header[1]; N = header[2]; nz = header[3]; 3035 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3036 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3037 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3038 3039 /* set block sizes from the viewer's .info file */ 3040 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3041 /* set global sizes if not set already */ 3042 if (mat->rmap->N < 0) mat->rmap->N = M; 3043 if (mat->cmap->N < 0) mat->cmap->N = N; 3044 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3045 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3046 3047 /* check if the matrix sizes are correct */ 3048 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3049 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3050 3051 /* read in row lengths and build row indices */ 3052 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3053 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3054 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3055 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3056 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 3057 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3058 /* read in column indices and matrix values */ 3059 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3060 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3061 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3062 /* store matrix indices and values */ 3063 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3064 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3065 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 /* Not scalable because of ISAllGather() unless getting all columns. */ 3070 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3071 { 3072 PetscErrorCode ierr; 3073 IS iscol_local; 3074 PetscBool isstride; 3075 PetscMPIInt lisstride=0,gisstride; 3076 3077 PetscFunctionBegin; 3078 /* check if we are grabbing all columns*/ 3079 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3080 3081 if (isstride) { 3082 PetscInt start,len,mstart,mlen; 3083 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3084 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3085 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3086 if (mstart == start && mlen-mstart == len) lisstride = 1; 3087 } 3088 3089 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3090 if (gisstride) { 3091 PetscInt N; 3092 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3093 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3094 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3095 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3096 } else { 3097 PetscInt cbs; 3098 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3099 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3100 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3101 } 3102 3103 *isseq = iscol_local; 3104 PetscFunctionReturn(0); 3105 } 3106 3107 /* 3108 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3109 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3110 3111 Input Parameters: 3112 mat - matrix 3113 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3114 i.e., mat->rstart <= isrow[i] < mat->rend 3115 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3116 i.e., mat->cstart <= iscol[i] < mat->cend 3117 Output Parameter: 3118 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3119 iscol_o - sequential column index set for retrieving mat->B 3120 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3121 */ 3122 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3123 { 3124 PetscErrorCode ierr; 3125 Vec x,cmap; 3126 const PetscInt *is_idx; 3127 PetscScalar *xarray,*cmaparray; 3128 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3129 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3130 Mat B=a->B; 3131 Vec lvec=a->lvec,lcmap; 3132 PetscInt i,cstart,cend,Bn=B->cmap->N; 3133 MPI_Comm comm; 3134 VecScatter Mvctx=a->Mvctx; 3135 3136 PetscFunctionBegin; 3137 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3138 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3139 3140 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3141 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3142 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3143 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3144 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3145 3146 /* Get start indices */ 3147 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3148 isstart -= ncols; 3149 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3150 3151 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3152 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3153 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3154 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3155 for (i=0; i<ncols; i++) { 3156 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3157 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3158 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3159 } 3160 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3161 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3162 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3163 3164 /* Get iscol_d */ 3165 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3166 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3167 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3168 3169 /* Get isrow_d */ 3170 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3171 rstart = mat->rmap->rstart; 3172 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3173 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3174 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3175 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3176 3177 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3178 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3179 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3180 3181 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3182 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3183 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3184 3185 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3186 3187 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3188 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3189 3190 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3191 /* off-process column indices */ 3192 count = 0; 3193 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3194 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3195 3196 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3197 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3198 for (i=0; i<Bn; i++) { 3199 if (PetscRealPart(xarray[i]) > -1.0) { 3200 idx[count] = i; /* local column index in off-diagonal part B */ 3201 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3202 count++; 3203 } 3204 } 3205 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3206 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3207 3208 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3209 /* cannot ensure iscol_o has same blocksize as iscol! */ 3210 3211 ierr = PetscFree(idx);CHKERRQ(ierr); 3212 *garray = cmap1; 3213 3214 ierr = VecDestroy(&x);CHKERRQ(ierr); 3215 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3216 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3217 PetscFunctionReturn(0); 3218 } 3219 3220 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3221 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3222 { 3223 PetscErrorCode ierr; 3224 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3225 Mat M = NULL; 3226 MPI_Comm comm; 3227 IS iscol_d,isrow_d,iscol_o; 3228 Mat Asub = NULL,Bsub = NULL; 3229 PetscInt n; 3230 3231 PetscFunctionBegin; 3232 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3233 3234 if (call == MAT_REUSE_MATRIX) { 3235 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3236 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3237 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3238 3239 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3240 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3241 3242 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3243 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3244 3245 /* Update diagonal and off-diagonal portions of submat */ 3246 asub = (Mat_MPIAIJ*)(*submat)->data; 3247 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3248 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3249 if (n) { 3250 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3251 } 3252 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3253 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3254 3255 } else { /* call == MAT_INITIAL_MATRIX) */ 3256 const PetscInt *garray; 3257 PetscInt BsubN; 3258 3259 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3260 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3261 3262 /* Create local submatrices Asub and Bsub */ 3263 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3264 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3265 3266 /* Create submatrix M */ 3267 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3268 3269 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3270 asub = (Mat_MPIAIJ*)M->data; 3271 3272 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3273 n = asub->B->cmap->N; 3274 if (BsubN > n) { 3275 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3276 const PetscInt *idx; 3277 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3278 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3279 3280 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3281 j = 0; 3282 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3283 for (i=0; i<n; i++) { 3284 if (j >= BsubN) break; 3285 while (subgarray[i] > garray[j]) j++; 3286 3287 if (subgarray[i] == garray[j]) { 3288 idx_new[i] = idx[j++]; 3289 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3290 } 3291 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3292 3293 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3294 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3295 3296 } else if (BsubN < n) { 3297 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3298 } 3299 3300 ierr = PetscFree(garray);CHKERRQ(ierr); 3301 *submat = M; 3302 3303 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3304 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3305 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3306 3307 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3308 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3309 3310 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3311 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3312 } 3313 PetscFunctionReturn(0); 3314 } 3315 3316 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3317 { 3318 PetscErrorCode ierr; 3319 IS iscol_local=NULL,isrow_d; 3320 PetscInt csize; 3321 PetscInt n,i,j,start,end; 3322 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3323 MPI_Comm comm; 3324 3325 PetscFunctionBegin; 3326 /* If isrow has same processor distribution as mat, 3327 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3328 if (call == MAT_REUSE_MATRIX) { 3329 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3330 if (isrow_d) { 3331 sameRowDist = PETSC_TRUE; 3332 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3333 } else { 3334 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3335 if (iscol_local) { 3336 sameRowDist = PETSC_TRUE; 3337 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3338 } 3339 } 3340 } else { 3341 /* Check if isrow has same processor distribution as mat */ 3342 sameDist[0] = PETSC_FALSE; 3343 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3344 if (!n) { 3345 sameDist[0] = PETSC_TRUE; 3346 } else { 3347 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3348 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3349 if (i >= start && j < end) { 3350 sameDist[0] = PETSC_TRUE; 3351 } 3352 } 3353 3354 /* Check if iscol has same processor distribution as mat */ 3355 sameDist[1] = PETSC_FALSE; 3356 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3357 if (!n) { 3358 sameDist[1] = PETSC_TRUE; 3359 } else { 3360 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3361 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3362 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3363 } 3364 3365 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3366 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr); 3367 sameRowDist = tsameDist[0]; 3368 } 3369 3370 if (sameRowDist) { 3371 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3372 /* isrow and iscol have same processor distribution as mat */ 3373 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3374 PetscFunctionReturn(0); 3375 } else { /* sameRowDist */ 3376 /* isrow has same processor distribution as mat */ 3377 if (call == MAT_INITIAL_MATRIX) { 3378 PetscBool sorted; 3379 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3380 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3381 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3382 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3383 3384 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3385 if (sorted) { 3386 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3387 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3388 PetscFunctionReturn(0); 3389 } 3390 } else { /* call == MAT_REUSE_MATRIX */ 3391 IS iscol_sub; 3392 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3393 if (iscol_sub) { 3394 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3395 PetscFunctionReturn(0); 3396 } 3397 } 3398 } 3399 } 3400 3401 /* General case: iscol -> iscol_local which has global size of iscol */ 3402 if (call == MAT_REUSE_MATRIX) { 3403 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3404 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3405 } else { 3406 if (!iscol_local) { 3407 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3408 } 3409 } 3410 3411 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3412 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3413 3414 if (call == MAT_INITIAL_MATRIX) { 3415 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3416 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3417 } 3418 PetscFunctionReturn(0); 3419 } 3420 3421 /*@C 3422 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3423 and "off-diagonal" part of the matrix in CSR format. 3424 3425 Collective 3426 3427 Input Parameters: 3428 + comm - MPI communicator 3429 . A - "diagonal" portion of matrix 3430 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3431 - garray - global index of B columns 3432 3433 Output Parameter: 3434 . mat - the matrix, with input A as its local diagonal matrix 3435 Level: advanced 3436 3437 Notes: 3438 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3439 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3440 3441 .seealso: MatCreateMPIAIJWithSplitArrays() 3442 @*/ 3443 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3444 { 3445 PetscErrorCode ierr; 3446 Mat_MPIAIJ *maij; 3447 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3448 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3449 const PetscScalar *oa; 3450 Mat Bnew; 3451 PetscInt m,n,N; 3452 3453 PetscFunctionBegin; 3454 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3455 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3456 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3457 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3458 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3459 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3460 3461 /* Get global columns of mat */ 3462 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3463 3464 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3465 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3466 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3467 maij = (Mat_MPIAIJ*)(*mat)->data; 3468 3469 (*mat)->preallocated = PETSC_TRUE; 3470 3471 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3472 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3473 3474 /* Set A as diagonal portion of *mat */ 3475 maij->A = A; 3476 3477 nz = oi[m]; 3478 for (i=0; i<nz; i++) { 3479 col = oj[i]; 3480 oj[i] = garray[col]; 3481 } 3482 3483 /* Set Bnew as off-diagonal portion of *mat */ 3484 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3485 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3486 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3487 bnew = (Mat_SeqAIJ*)Bnew->data; 3488 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3489 maij->B = Bnew; 3490 3491 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3492 3493 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3494 b->free_a = PETSC_FALSE; 3495 b->free_ij = PETSC_FALSE; 3496 ierr = MatDestroy(&B);CHKERRQ(ierr); 3497 3498 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3499 bnew->free_a = PETSC_TRUE; 3500 bnew->free_ij = PETSC_TRUE; 3501 3502 /* condense columns of maij->B */ 3503 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3504 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3505 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3506 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3507 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3508 PetscFunctionReturn(0); 3509 } 3510 3511 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3512 3513 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3514 { 3515 PetscErrorCode ierr; 3516 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3517 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3518 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3519 Mat M,Msub,B=a->B; 3520 MatScalar *aa; 3521 Mat_SeqAIJ *aij; 3522 PetscInt *garray = a->garray,*colsub,Ncols; 3523 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3524 IS iscol_sub,iscmap; 3525 const PetscInt *is_idx,*cmap; 3526 PetscBool allcolumns=PETSC_FALSE; 3527 MPI_Comm comm; 3528 3529 PetscFunctionBegin; 3530 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3531 if (call == MAT_REUSE_MATRIX) { 3532 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3533 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3534 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3535 3536 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3537 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3538 3539 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3540 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3541 3542 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3543 3544 } else { /* call == MAT_INITIAL_MATRIX) */ 3545 PetscBool flg; 3546 3547 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3548 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3549 3550 /* (1) iscol -> nonscalable iscol_local */ 3551 /* Check for special case: each processor gets entire matrix columns */ 3552 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3553 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3554 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3555 if (allcolumns) { 3556 iscol_sub = iscol_local; 3557 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3558 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3559 3560 } else { 3561 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3562 PetscInt *idx,*cmap1,k; 3563 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3564 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3565 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3566 count = 0; 3567 k = 0; 3568 for (i=0; i<Ncols; i++) { 3569 j = is_idx[i]; 3570 if (j >= cstart && j < cend) { 3571 /* diagonal part of mat */ 3572 idx[count] = j; 3573 cmap1[count++] = i; /* column index in submat */ 3574 } else if (Bn) { 3575 /* off-diagonal part of mat */ 3576 if (j == garray[k]) { 3577 idx[count] = j; 3578 cmap1[count++] = i; /* column index in submat */ 3579 } else if (j > garray[k]) { 3580 while (j > garray[k] && k < Bn-1) k++; 3581 if (j == garray[k]) { 3582 idx[count] = j; 3583 cmap1[count++] = i; /* column index in submat */ 3584 } 3585 } 3586 } 3587 } 3588 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3589 3590 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3591 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3592 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3593 3594 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3595 } 3596 3597 /* (3) Create sequential Msub */ 3598 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3599 } 3600 3601 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3602 aij = (Mat_SeqAIJ*)(Msub)->data; 3603 ii = aij->i; 3604 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3605 3606 /* 3607 m - number of local rows 3608 Ncols - number of columns (same on all processors) 3609 rstart - first row in new global matrix generated 3610 */ 3611 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3612 3613 if (call == MAT_INITIAL_MATRIX) { 3614 /* (4) Create parallel newmat */ 3615 PetscMPIInt rank,size; 3616 PetscInt csize; 3617 3618 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3619 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3620 3621 /* 3622 Determine the number of non-zeros in the diagonal and off-diagonal 3623 portions of the matrix in order to do correct preallocation 3624 */ 3625 3626 /* first get start and end of "diagonal" columns */ 3627 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3628 if (csize == PETSC_DECIDE) { 3629 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3630 if (mglobal == Ncols) { /* square matrix */ 3631 nlocal = m; 3632 } else { 3633 nlocal = Ncols/size + ((Ncols % size) > rank); 3634 } 3635 } else { 3636 nlocal = csize; 3637 } 3638 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3639 rstart = rend - nlocal; 3640 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3641 3642 /* next, compute all the lengths */ 3643 jj = aij->j; 3644 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3645 olens = dlens + m; 3646 for (i=0; i<m; i++) { 3647 jend = ii[i+1] - ii[i]; 3648 olen = 0; 3649 dlen = 0; 3650 for (j=0; j<jend; j++) { 3651 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3652 else dlen++; 3653 jj++; 3654 } 3655 olens[i] = olen; 3656 dlens[i] = dlen; 3657 } 3658 3659 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3660 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3661 3662 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3663 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3664 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3665 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3666 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3667 ierr = PetscFree(dlens);CHKERRQ(ierr); 3668 3669 } else { /* call == MAT_REUSE_MATRIX */ 3670 M = *newmat; 3671 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3672 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3673 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3674 /* 3675 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3676 rather than the slower MatSetValues(). 3677 */ 3678 M->was_assembled = PETSC_TRUE; 3679 M->assembled = PETSC_FALSE; 3680 } 3681 3682 /* (5) Set values of Msub to *newmat */ 3683 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3684 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3685 3686 jj = aij->j; 3687 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3688 for (i=0; i<m; i++) { 3689 row = rstart + i; 3690 nz = ii[i+1] - ii[i]; 3691 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3692 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3693 jj += nz; aa += nz; 3694 } 3695 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3696 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3697 3698 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3699 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3700 3701 ierr = PetscFree(colsub);CHKERRQ(ierr); 3702 3703 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3704 if (call == MAT_INITIAL_MATRIX) { 3705 *newmat = M; 3706 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3707 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3708 3709 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3710 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3711 3712 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3713 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3714 3715 if (iscol_local) { 3716 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3717 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3718 } 3719 } 3720 PetscFunctionReturn(0); 3721 } 3722 3723 /* 3724 Not great since it makes two copies of the submatrix, first an SeqAIJ 3725 in local and then by concatenating the local matrices the end result. 3726 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3727 3728 Note: This requires a sequential iscol with all indices. 3729 */ 3730 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3731 { 3732 PetscErrorCode ierr; 3733 PetscMPIInt rank,size; 3734 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3735 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3736 Mat M,Mreuse; 3737 MatScalar *aa,*vwork; 3738 MPI_Comm comm; 3739 Mat_SeqAIJ *aij; 3740 PetscBool colflag,allcolumns=PETSC_FALSE; 3741 3742 PetscFunctionBegin; 3743 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3744 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3745 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3746 3747 /* Check for special case: each processor gets entire matrix columns */ 3748 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3749 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3750 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3751 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3752 3753 if (call == MAT_REUSE_MATRIX) { 3754 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3755 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3756 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3757 } else { 3758 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3759 } 3760 3761 /* 3762 m - number of local rows 3763 n - number of columns (same on all processors) 3764 rstart - first row in new global matrix generated 3765 */ 3766 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3767 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3768 if (call == MAT_INITIAL_MATRIX) { 3769 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3770 ii = aij->i; 3771 jj = aij->j; 3772 3773 /* 3774 Determine the number of non-zeros in the diagonal and off-diagonal 3775 portions of the matrix in order to do correct preallocation 3776 */ 3777 3778 /* first get start and end of "diagonal" columns */ 3779 if (csize == PETSC_DECIDE) { 3780 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3781 if (mglobal == n) { /* square matrix */ 3782 nlocal = m; 3783 } else { 3784 nlocal = n/size + ((n % size) > rank); 3785 } 3786 } else { 3787 nlocal = csize; 3788 } 3789 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3790 rstart = rend - nlocal; 3791 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3792 3793 /* next, compute all the lengths */ 3794 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3795 olens = dlens + m; 3796 for (i=0; i<m; i++) { 3797 jend = ii[i+1] - ii[i]; 3798 olen = 0; 3799 dlen = 0; 3800 for (j=0; j<jend; j++) { 3801 if (*jj < rstart || *jj >= rend) olen++; 3802 else dlen++; 3803 jj++; 3804 } 3805 olens[i] = olen; 3806 dlens[i] = dlen; 3807 } 3808 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3809 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3810 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3811 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3812 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3813 ierr = PetscFree(dlens);CHKERRQ(ierr); 3814 } else { 3815 PetscInt ml,nl; 3816 3817 M = *newmat; 3818 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3819 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3820 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3821 /* 3822 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3823 rather than the slower MatSetValues(). 3824 */ 3825 M->was_assembled = PETSC_TRUE; 3826 M->assembled = PETSC_FALSE; 3827 } 3828 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3829 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3830 ii = aij->i; 3831 jj = aij->j; 3832 3833 /* trigger copy to CPU if needed */ 3834 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3835 for (i=0; i<m; i++) { 3836 row = rstart + i; 3837 nz = ii[i+1] - ii[i]; 3838 cwork = jj; jj += nz; 3839 vwork = aa; aa += nz; 3840 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3841 } 3842 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3843 3844 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3845 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3846 *newmat = M; 3847 3848 /* save submatrix used in processor for next request */ 3849 if (call == MAT_INITIAL_MATRIX) { 3850 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3851 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3852 } 3853 PetscFunctionReturn(0); 3854 } 3855 3856 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3857 { 3858 PetscInt m,cstart, cend,j,nnz,i,d; 3859 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3860 const PetscInt *JJ; 3861 PetscErrorCode ierr; 3862 PetscBool nooffprocentries; 3863 3864 PetscFunctionBegin; 3865 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3866 3867 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3868 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3869 m = B->rmap->n; 3870 cstart = B->cmap->rstart; 3871 cend = B->cmap->rend; 3872 rstart = B->rmap->rstart; 3873 3874 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3875 3876 if (PetscDefined(USE_DEBUG)) { 3877 for (i=0; i<m; i++) { 3878 nnz = Ii[i+1]- Ii[i]; 3879 JJ = J + Ii[i]; 3880 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3881 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3882 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3883 } 3884 } 3885 3886 for (i=0; i<m; i++) { 3887 nnz = Ii[i+1]- Ii[i]; 3888 JJ = J + Ii[i]; 3889 nnz_max = PetscMax(nnz_max,nnz); 3890 d = 0; 3891 for (j=0; j<nnz; j++) { 3892 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3893 } 3894 d_nnz[i] = d; 3895 o_nnz[i] = nnz - d; 3896 } 3897 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3898 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3899 3900 for (i=0; i<m; i++) { 3901 ii = i + rstart; 3902 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3903 } 3904 nooffprocentries = B->nooffprocentries; 3905 B->nooffprocentries = PETSC_TRUE; 3906 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3907 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3908 B->nooffprocentries = nooffprocentries; 3909 3910 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3911 PetscFunctionReturn(0); 3912 } 3913 3914 /*@ 3915 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3916 (the default parallel PETSc format). 3917 3918 Collective 3919 3920 Input Parameters: 3921 + B - the matrix 3922 . i - the indices into j for the start of each local row (starts with zero) 3923 . j - the column indices for each local row (starts with zero) 3924 - v - optional values in the matrix 3925 3926 Level: developer 3927 3928 Notes: 3929 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3930 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3931 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3932 3933 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3934 3935 The format which is used for the sparse matrix input, is equivalent to a 3936 row-major ordering.. i.e for the following matrix, the input data expected is 3937 as shown 3938 3939 $ 1 0 0 3940 $ 2 0 3 P0 3941 $ ------- 3942 $ 4 5 6 P1 3943 $ 3944 $ Process0 [P0]: rows_owned=[0,1] 3945 $ i = {0,1,3} [size = nrow+1 = 2+1] 3946 $ j = {0,0,2} [size = 3] 3947 $ v = {1,2,3} [size = 3] 3948 $ 3949 $ Process1 [P1]: rows_owned=[2] 3950 $ i = {0,3} [size = nrow+1 = 1+1] 3951 $ j = {0,1,2} [size = 3] 3952 $ v = {4,5,6} [size = 3] 3953 3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3955 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3956 @*/ 3957 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3958 { 3959 PetscErrorCode ierr; 3960 3961 PetscFunctionBegin; 3962 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3963 PetscFunctionReturn(0); 3964 } 3965 3966 /*@C 3967 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3968 (the default parallel PETSc format). For good matrix assembly performance 3969 the user should preallocate the matrix storage by setting the parameters 3970 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3971 performance can be increased by more than a factor of 50. 3972 3973 Collective 3974 3975 Input Parameters: 3976 + B - the matrix 3977 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3978 (same value is used for all local rows) 3979 . d_nnz - array containing the number of nonzeros in the various rows of the 3980 DIAGONAL portion of the local submatrix (possibly different for each row) 3981 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3982 The size of this array is equal to the number of local rows, i.e 'm'. 3983 For matrices that will be factored, you must leave room for (and set) 3984 the diagonal entry even if it is zero. 3985 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3986 submatrix (same value is used for all local rows). 3987 - o_nnz - array containing the number of nonzeros in the various rows of the 3988 OFF-DIAGONAL portion of the local submatrix (possibly different for 3989 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3990 structure. The size of this array is equal to the number 3991 of local rows, i.e 'm'. 3992 3993 If the *_nnz parameter is given then the *_nz parameter is ignored 3994 3995 The AIJ format (also called the Yale sparse matrix format or 3996 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3997 storage. The stored row and column indices begin with zero. 3998 See Users-Manual: ch_mat for details. 3999 4000 The parallel matrix is partitioned such that the first m0 rows belong to 4001 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4002 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4003 4004 The DIAGONAL portion of the local submatrix of a processor can be defined 4005 as the submatrix which is obtained by extraction the part corresponding to 4006 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4007 first row that belongs to the processor, r2 is the last row belonging to 4008 the this processor, and c1-c2 is range of indices of the local part of a 4009 vector suitable for applying the matrix to. This is an mxn matrix. In the 4010 common case of a square matrix, the row and column ranges are the same and 4011 the DIAGONAL part is also square. The remaining portion of the local 4012 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4013 4014 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4015 4016 You can call MatGetInfo() to get information on how effective the preallocation was; 4017 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4018 You can also run with the option -info and look for messages with the string 4019 malloc in them to see if additional memory allocation was needed. 4020 4021 Example usage: 4022 4023 Consider the following 8x8 matrix with 34 non-zero values, that is 4024 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4025 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4026 as follows: 4027 4028 .vb 4029 1 2 0 | 0 3 0 | 0 4 4030 Proc0 0 5 6 | 7 0 0 | 8 0 4031 9 0 10 | 11 0 0 | 12 0 4032 ------------------------------------- 4033 13 0 14 | 15 16 17 | 0 0 4034 Proc1 0 18 0 | 19 20 21 | 0 0 4035 0 0 0 | 22 23 0 | 24 0 4036 ------------------------------------- 4037 Proc2 25 26 27 | 0 0 28 | 29 0 4038 30 0 0 | 31 32 33 | 0 34 4039 .ve 4040 4041 This can be represented as a collection of submatrices as: 4042 4043 .vb 4044 A B C 4045 D E F 4046 G H I 4047 .ve 4048 4049 Where the submatrices A,B,C are owned by proc0, D,E,F are 4050 owned by proc1, G,H,I are owned by proc2. 4051 4052 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4053 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4054 The 'M','N' parameters are 8,8, and have the same values on all procs. 4055 4056 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4057 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4058 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4059 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4060 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4061 matrix, ans [DF] as another SeqAIJ matrix. 4062 4063 When d_nz, o_nz parameters are specified, d_nz storage elements are 4064 allocated for every row of the local diagonal submatrix, and o_nz 4065 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4066 One way to choose d_nz and o_nz is to use the max nonzerors per local 4067 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4068 In this case, the values of d_nz,o_nz are: 4069 .vb 4070 proc0 : dnz = 2, o_nz = 2 4071 proc1 : dnz = 3, o_nz = 2 4072 proc2 : dnz = 1, o_nz = 4 4073 .ve 4074 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4075 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4076 for proc3. i.e we are using 12+15+10=37 storage locations to store 4077 34 values. 4078 4079 When d_nnz, o_nnz parameters are specified, the storage is specified 4080 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4081 In the above case the values for d_nnz,o_nnz are: 4082 .vb 4083 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4084 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4085 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4086 .ve 4087 Here the space allocated is sum of all the above values i.e 34, and 4088 hence pre-allocation is perfect. 4089 4090 Level: intermediate 4091 4092 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4093 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4094 @*/ 4095 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4096 { 4097 PetscErrorCode ierr; 4098 4099 PetscFunctionBegin; 4100 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4101 PetscValidType(B,1); 4102 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4103 PetscFunctionReturn(0); 4104 } 4105 4106 /*@ 4107 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4108 CSR format for the local rows. 4109 4110 Collective 4111 4112 Input Parameters: 4113 + comm - MPI communicator 4114 . m - number of local rows (Cannot be PETSC_DECIDE) 4115 . n - This value should be the same as the local size used in creating the 4116 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4117 calculated if N is given) For square matrices n is almost always m. 4118 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4119 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4120 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4121 . j - column indices 4122 - a - matrix values 4123 4124 Output Parameter: 4125 . mat - the matrix 4126 4127 Level: intermediate 4128 4129 Notes: 4130 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4131 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4132 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4133 4134 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4135 4136 The format which is used for the sparse matrix input, is equivalent to a 4137 row-major ordering.. i.e for the following matrix, the input data expected is 4138 as shown 4139 4140 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4141 4142 $ 1 0 0 4143 $ 2 0 3 P0 4144 $ ------- 4145 $ 4 5 6 P1 4146 $ 4147 $ Process0 [P0]: rows_owned=[0,1] 4148 $ i = {0,1,3} [size = nrow+1 = 2+1] 4149 $ j = {0,0,2} [size = 3] 4150 $ v = {1,2,3} [size = 3] 4151 $ 4152 $ Process1 [P1]: rows_owned=[2] 4153 $ i = {0,3} [size = nrow+1 = 1+1] 4154 $ j = {0,1,2} [size = 3] 4155 $ v = {4,5,6} [size = 3] 4156 4157 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4158 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4159 @*/ 4160 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4161 { 4162 PetscErrorCode ierr; 4163 4164 PetscFunctionBegin; 4165 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4166 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4167 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4168 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4169 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4170 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4171 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4172 PetscFunctionReturn(0); 4173 } 4174 4175 /*@ 4176 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4177 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4178 4179 Collective 4180 4181 Input Parameters: 4182 + mat - the matrix 4183 . m - number of local rows (Cannot be PETSC_DECIDE) 4184 . n - This value should be the same as the local size used in creating the 4185 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4186 calculated if N is given) For square matrices n is almost always m. 4187 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4188 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4189 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4190 . J - column indices 4191 - v - matrix values 4192 4193 Level: intermediate 4194 4195 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4196 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4197 @*/ 4198 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4199 { 4200 PetscErrorCode ierr; 4201 PetscInt cstart,nnz,i,j; 4202 PetscInt *ld; 4203 PetscBool nooffprocentries; 4204 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4205 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4206 PetscScalar *ad = Ad->a, *ao = Ao->a; 4207 const PetscInt *Adi = Ad->i; 4208 PetscInt ldi,Iii,md; 4209 4210 PetscFunctionBegin; 4211 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4212 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4213 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4214 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4215 4216 cstart = mat->cmap->rstart; 4217 if (!Aij->ld) { 4218 /* count number of entries below block diagonal */ 4219 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4220 Aij->ld = ld; 4221 for (i=0; i<m; i++) { 4222 nnz = Ii[i+1]- Ii[i]; 4223 j = 0; 4224 while (J[j] < cstart && j < nnz) {j++;} 4225 J += nnz; 4226 ld[i] = j; 4227 } 4228 } else { 4229 ld = Aij->ld; 4230 } 4231 4232 for (i=0; i<m; i++) { 4233 nnz = Ii[i+1]- Ii[i]; 4234 Iii = Ii[i]; 4235 ldi = ld[i]; 4236 md = Adi[i+1]-Adi[i]; 4237 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4238 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4239 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4240 ad += md; 4241 ao += nnz - md; 4242 } 4243 nooffprocentries = mat->nooffprocentries; 4244 mat->nooffprocentries = PETSC_TRUE; 4245 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4246 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4247 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4248 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4249 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4250 mat->nooffprocentries = nooffprocentries; 4251 PetscFunctionReturn(0); 4252 } 4253 4254 /*@C 4255 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4256 (the default parallel PETSc format). For good matrix assembly performance 4257 the user should preallocate the matrix storage by setting the parameters 4258 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4259 performance can be increased by more than a factor of 50. 4260 4261 Collective 4262 4263 Input Parameters: 4264 + comm - MPI communicator 4265 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4266 This value should be the same as the local size used in creating the 4267 y vector for the matrix-vector product y = Ax. 4268 . n - This value should be the same as the local size used in creating the 4269 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4270 calculated if N is given) For square matrices n is almost always m. 4271 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4272 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4273 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4274 (same value is used for all local rows) 4275 . d_nnz - array containing the number of nonzeros in the various rows of the 4276 DIAGONAL portion of the local submatrix (possibly different for each row) 4277 or NULL, if d_nz is used to specify the nonzero structure. 4278 The size of this array is equal to the number of local rows, i.e 'm'. 4279 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4280 submatrix (same value is used for all local rows). 4281 - o_nnz - array containing the number of nonzeros in the various rows of the 4282 OFF-DIAGONAL portion of the local submatrix (possibly different for 4283 each row) or NULL, if o_nz is used to specify the nonzero 4284 structure. The size of this array is equal to the number 4285 of local rows, i.e 'm'. 4286 4287 Output Parameter: 4288 . A - the matrix 4289 4290 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4291 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4292 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4293 4294 Notes: 4295 If the *_nnz parameter is given then the *_nz parameter is ignored 4296 4297 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4298 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4299 storage requirements for this matrix. 4300 4301 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4302 processor than it must be used on all processors that share the object for 4303 that argument. 4304 4305 The user MUST specify either the local or global matrix dimensions 4306 (possibly both). 4307 4308 The parallel matrix is partitioned across processors such that the 4309 first m0 rows belong to process 0, the next m1 rows belong to 4310 process 1, the next m2 rows belong to process 2 etc.. where 4311 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4312 values corresponding to [m x N] submatrix. 4313 4314 The columns are logically partitioned with the n0 columns belonging 4315 to 0th partition, the next n1 columns belonging to the next 4316 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4317 4318 The DIAGONAL portion of the local submatrix on any given processor 4319 is the submatrix corresponding to the rows and columns m,n 4320 corresponding to the given processor. i.e diagonal matrix on 4321 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4322 etc. The remaining portion of the local submatrix [m x (N-n)] 4323 constitute the OFF-DIAGONAL portion. The example below better 4324 illustrates this concept. 4325 4326 For a square global matrix we define each processor's diagonal portion 4327 to be its local rows and the corresponding columns (a square submatrix); 4328 each processor's off-diagonal portion encompasses the remainder of the 4329 local matrix (a rectangular submatrix). 4330 4331 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4332 4333 When calling this routine with a single process communicator, a matrix of 4334 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4335 type of communicator, use the construction mechanism 4336 .vb 4337 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4338 .ve 4339 4340 $ MatCreate(...,&A); 4341 $ MatSetType(A,MATMPIAIJ); 4342 $ MatSetSizes(A, m,n,M,N); 4343 $ MatMPIAIJSetPreallocation(A,...); 4344 4345 By default, this format uses inodes (identical nodes) when possible. 4346 We search for consecutive rows with the same nonzero structure, thereby 4347 reusing matrix information to achieve increased efficiency. 4348 4349 Options Database Keys: 4350 + -mat_no_inode - Do not use inodes 4351 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4352 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4353 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4354 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4355 4356 Example usage: 4357 4358 Consider the following 8x8 matrix with 34 non-zero values, that is 4359 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4360 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4361 as follows 4362 4363 .vb 4364 1 2 0 | 0 3 0 | 0 4 4365 Proc0 0 5 6 | 7 0 0 | 8 0 4366 9 0 10 | 11 0 0 | 12 0 4367 ------------------------------------- 4368 13 0 14 | 15 16 17 | 0 0 4369 Proc1 0 18 0 | 19 20 21 | 0 0 4370 0 0 0 | 22 23 0 | 24 0 4371 ------------------------------------- 4372 Proc2 25 26 27 | 0 0 28 | 29 0 4373 30 0 0 | 31 32 33 | 0 34 4374 .ve 4375 4376 This can be represented as a collection of submatrices as 4377 4378 .vb 4379 A B C 4380 D E F 4381 G H I 4382 .ve 4383 4384 Where the submatrices A,B,C are owned by proc0, D,E,F are 4385 owned by proc1, G,H,I are owned by proc2. 4386 4387 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4388 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4389 The 'M','N' parameters are 8,8, and have the same values on all procs. 4390 4391 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4392 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4393 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4394 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4395 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4396 matrix, ans [DF] as another SeqAIJ matrix. 4397 4398 When d_nz, o_nz parameters are specified, d_nz storage elements are 4399 allocated for every row of the local diagonal submatrix, and o_nz 4400 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4401 One way to choose d_nz and o_nz is to use the max nonzerors per local 4402 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4403 In this case, the values of d_nz,o_nz are 4404 .vb 4405 proc0 : dnz = 2, o_nz = 2 4406 proc1 : dnz = 3, o_nz = 2 4407 proc2 : dnz = 1, o_nz = 4 4408 .ve 4409 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4410 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4411 for proc3. i.e we are using 12+15+10=37 storage locations to store 4412 34 values. 4413 4414 When d_nnz, o_nnz parameters are specified, the storage is specified 4415 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4416 In the above case the values for d_nnz,o_nnz are 4417 .vb 4418 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4419 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4420 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4421 .ve 4422 Here the space allocated is sum of all the above values i.e 34, and 4423 hence pre-allocation is perfect. 4424 4425 Level: intermediate 4426 4427 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4428 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4429 @*/ 4430 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4431 { 4432 PetscErrorCode ierr; 4433 PetscMPIInt size; 4434 4435 PetscFunctionBegin; 4436 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4437 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4438 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4439 if (size > 1) { 4440 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4441 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4442 } else { 4443 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4444 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4445 } 4446 PetscFunctionReturn(0); 4447 } 4448 4449 /*@C 4450 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4451 4452 Not collective 4453 4454 Input Parameter: 4455 . A - The MPIAIJ matrix 4456 4457 Output Parameters: 4458 + Ad - The local diagonal block as a SeqAIJ matrix 4459 . Ao - The local off-diagonal block as a SeqAIJ matrix 4460 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4461 4462 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4463 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4464 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4465 local column numbers to global column numbers in the original matrix. 4466 4467 Level: intermediate 4468 4469 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4470 @*/ 4471 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4472 { 4473 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4474 PetscBool flg; 4475 PetscErrorCode ierr; 4476 4477 PetscFunctionBegin; 4478 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4479 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4480 if (Ad) *Ad = a->A; 4481 if (Ao) *Ao = a->B; 4482 if (colmap) *colmap = a->garray; 4483 PetscFunctionReturn(0); 4484 } 4485 4486 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4487 { 4488 PetscErrorCode ierr; 4489 PetscInt m,N,i,rstart,nnz,Ii; 4490 PetscInt *indx; 4491 PetscScalar *values; 4492 MatType rootType; 4493 4494 PetscFunctionBegin; 4495 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4496 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4497 PetscInt *dnz,*onz,sum,bs,cbs; 4498 4499 if (n == PETSC_DECIDE) { 4500 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4501 } 4502 /* Check sum(n) = N */ 4503 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4504 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4505 4506 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4507 rstart -= m; 4508 4509 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4510 for (i=0; i<m; i++) { 4511 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4512 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4513 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4514 } 4515 4516 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4517 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4518 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4519 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4520 ierr = MatGetRootType_Private(inmat,&rootType);CHKERRQ(ierr); 4521 ierr = MatSetType(*outmat,rootType);CHKERRQ(ierr); 4522 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4523 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4524 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4525 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4526 } 4527 4528 /* numeric phase */ 4529 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4530 for (i=0; i<m; i++) { 4531 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4532 Ii = i + rstart; 4533 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4534 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4535 } 4536 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4537 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4538 PetscFunctionReturn(0); 4539 } 4540 4541 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4542 { 4543 PetscErrorCode ierr; 4544 PetscMPIInt rank; 4545 PetscInt m,N,i,rstart,nnz; 4546 size_t len; 4547 const PetscInt *indx; 4548 PetscViewer out; 4549 char *name; 4550 Mat B; 4551 const PetscScalar *values; 4552 4553 PetscFunctionBegin; 4554 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4555 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4556 /* Should this be the type of the diagonal block of A? */ 4557 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4558 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4559 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4560 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4561 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4562 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4563 for (i=0; i<m; i++) { 4564 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4565 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4566 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4567 } 4568 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4569 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4570 4571 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4572 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4573 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4574 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4575 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4576 ierr = PetscFree(name);CHKERRQ(ierr); 4577 ierr = MatView(B,out);CHKERRQ(ierr); 4578 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4579 ierr = MatDestroy(&B);CHKERRQ(ierr); 4580 PetscFunctionReturn(0); 4581 } 4582 4583 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4584 { 4585 PetscErrorCode ierr; 4586 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4587 4588 PetscFunctionBegin; 4589 if (!merge) PetscFunctionReturn(0); 4590 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4591 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4592 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4593 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4594 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4595 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4596 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4597 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4598 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4599 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4600 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4601 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4602 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4603 ierr = PetscFree(merge);CHKERRQ(ierr); 4604 PetscFunctionReturn(0); 4605 } 4606 4607 #include <../src/mat/utils/freespace.h> 4608 #include <petscbt.h> 4609 4610 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4611 { 4612 PetscErrorCode ierr; 4613 MPI_Comm comm; 4614 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4615 PetscMPIInt size,rank,taga,*len_s; 4616 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4617 PetscInt proc,m; 4618 PetscInt **buf_ri,**buf_rj; 4619 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4620 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4621 MPI_Request *s_waits,*r_waits; 4622 MPI_Status *status; 4623 MatScalar *aa=a->a; 4624 MatScalar **abuf_r,*ba_i; 4625 Mat_Merge_SeqsToMPI *merge; 4626 PetscContainer container; 4627 4628 PetscFunctionBegin; 4629 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4630 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4631 4632 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4633 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4634 4635 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4636 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4637 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4638 4639 bi = merge->bi; 4640 bj = merge->bj; 4641 buf_ri = merge->buf_ri; 4642 buf_rj = merge->buf_rj; 4643 4644 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4645 owners = merge->rowmap->range; 4646 len_s = merge->len_s; 4647 4648 /* send and recv matrix values */ 4649 /*-----------------------------*/ 4650 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4651 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4652 4653 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4654 for (proc=0,k=0; proc<size; proc++) { 4655 if (!len_s[proc]) continue; 4656 i = owners[proc]; 4657 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4658 k++; 4659 } 4660 4661 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4662 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4663 ierr = PetscFree(status);CHKERRQ(ierr); 4664 4665 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4666 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4667 4668 /* insert mat values of mpimat */ 4669 /*----------------------------*/ 4670 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4671 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4672 4673 for (k=0; k<merge->nrecv; k++) { 4674 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4675 nrows = *(buf_ri_k[k]); 4676 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4677 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4678 } 4679 4680 /* set values of ba */ 4681 m = merge->rowmap->n; 4682 for (i=0; i<m; i++) { 4683 arow = owners[rank] + i; 4684 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4685 bnzi = bi[i+1] - bi[i]; 4686 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4687 4688 /* add local non-zero vals of this proc's seqmat into ba */ 4689 anzi = ai[arow+1] - ai[arow]; 4690 aj = a->j + ai[arow]; 4691 aa = a->a + ai[arow]; 4692 nextaj = 0; 4693 for (j=0; nextaj<anzi; j++) { 4694 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4695 ba_i[j] += aa[nextaj++]; 4696 } 4697 } 4698 4699 /* add received vals into ba */ 4700 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4701 /* i-th row */ 4702 if (i == *nextrow[k]) { 4703 anzi = *(nextai[k]+1) - *nextai[k]; 4704 aj = buf_rj[k] + *(nextai[k]); 4705 aa = abuf_r[k] + *(nextai[k]); 4706 nextaj = 0; 4707 for (j=0; nextaj<anzi; j++) { 4708 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4709 ba_i[j] += aa[nextaj++]; 4710 } 4711 } 4712 nextrow[k]++; nextai[k]++; 4713 } 4714 } 4715 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4716 } 4717 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4718 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4719 4720 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4721 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4722 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4723 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4724 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4725 PetscFunctionReturn(0); 4726 } 4727 4728 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4729 { 4730 PetscErrorCode ierr; 4731 Mat B_mpi; 4732 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4733 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4734 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4735 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4736 PetscInt len,proc,*dnz,*onz,bs,cbs; 4737 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4738 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4739 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4740 MPI_Status *status; 4741 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4742 PetscBT lnkbt; 4743 Mat_Merge_SeqsToMPI *merge; 4744 PetscContainer container; 4745 4746 PetscFunctionBegin; 4747 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4748 4749 /* make sure it is a PETSc comm */ 4750 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4751 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4752 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4753 4754 ierr = PetscNew(&merge);CHKERRQ(ierr); 4755 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4756 4757 /* determine row ownership */ 4758 /*---------------------------------------------------------*/ 4759 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4760 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4761 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4762 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4763 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4764 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4765 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4766 4767 m = merge->rowmap->n; 4768 owners = merge->rowmap->range; 4769 4770 /* determine the number of messages to send, their lengths */ 4771 /*---------------------------------------------------------*/ 4772 len_s = merge->len_s; 4773 4774 len = 0; /* length of buf_si[] */ 4775 merge->nsend = 0; 4776 for (proc=0; proc<size; proc++) { 4777 len_si[proc] = 0; 4778 if (proc == rank) { 4779 len_s[proc] = 0; 4780 } else { 4781 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4782 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4783 } 4784 if (len_s[proc]) { 4785 merge->nsend++; 4786 nrows = 0; 4787 for (i=owners[proc]; i<owners[proc+1]; i++) { 4788 if (ai[i+1] > ai[i]) nrows++; 4789 } 4790 len_si[proc] = 2*(nrows+1); 4791 len += len_si[proc]; 4792 } 4793 } 4794 4795 /* determine the number and length of messages to receive for ij-structure */ 4796 /*-------------------------------------------------------------------------*/ 4797 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4798 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4799 4800 /* post the Irecv of j-structure */ 4801 /*-------------------------------*/ 4802 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4803 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4804 4805 /* post the Isend of j-structure */ 4806 /*--------------------------------*/ 4807 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4808 4809 for (proc=0, k=0; proc<size; proc++) { 4810 if (!len_s[proc]) continue; 4811 i = owners[proc]; 4812 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4813 k++; 4814 } 4815 4816 /* receives and sends of j-structure are complete */ 4817 /*------------------------------------------------*/ 4818 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4819 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4820 4821 /* send and recv i-structure */ 4822 /*---------------------------*/ 4823 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4824 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4825 4826 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4827 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4828 for (proc=0,k=0; proc<size; proc++) { 4829 if (!len_s[proc]) continue; 4830 /* form outgoing message for i-structure: 4831 buf_si[0]: nrows to be sent 4832 [1:nrows]: row index (global) 4833 [nrows+1:2*nrows+1]: i-structure index 4834 */ 4835 /*-------------------------------------------*/ 4836 nrows = len_si[proc]/2 - 1; 4837 buf_si_i = buf_si + nrows+1; 4838 buf_si[0] = nrows; 4839 buf_si_i[0] = 0; 4840 nrows = 0; 4841 for (i=owners[proc]; i<owners[proc+1]; i++) { 4842 anzi = ai[i+1] - ai[i]; 4843 if (anzi) { 4844 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4845 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4846 nrows++; 4847 } 4848 } 4849 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4850 k++; 4851 buf_si += len_si[proc]; 4852 } 4853 4854 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4855 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4856 4857 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4858 for (i=0; i<merge->nrecv; i++) { 4859 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4860 } 4861 4862 ierr = PetscFree(len_si);CHKERRQ(ierr); 4863 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4864 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4865 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4866 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4867 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4868 ierr = PetscFree(status);CHKERRQ(ierr); 4869 4870 /* compute a local seq matrix in each processor */ 4871 /*----------------------------------------------*/ 4872 /* allocate bi array and free space for accumulating nonzero column info */ 4873 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4874 bi[0] = 0; 4875 4876 /* create and initialize a linked list */ 4877 nlnk = N+1; 4878 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4879 4880 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4881 len = ai[owners[rank+1]] - ai[owners[rank]]; 4882 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4883 4884 current_space = free_space; 4885 4886 /* determine symbolic info for each local row */ 4887 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4888 4889 for (k=0; k<merge->nrecv; k++) { 4890 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4891 nrows = *buf_ri_k[k]; 4892 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4893 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4894 } 4895 4896 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4897 len = 0; 4898 for (i=0; i<m; i++) { 4899 bnzi = 0; 4900 /* add local non-zero cols of this proc's seqmat into lnk */ 4901 arow = owners[rank] + i; 4902 anzi = ai[arow+1] - ai[arow]; 4903 aj = a->j + ai[arow]; 4904 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4905 bnzi += nlnk; 4906 /* add received col data into lnk */ 4907 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4908 if (i == *nextrow[k]) { /* i-th row */ 4909 anzi = *(nextai[k]+1) - *nextai[k]; 4910 aj = buf_rj[k] + *nextai[k]; 4911 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4912 bnzi += nlnk; 4913 nextrow[k]++; nextai[k]++; 4914 } 4915 } 4916 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4917 4918 /* if free space is not available, make more free space */ 4919 if (current_space->local_remaining<bnzi) { 4920 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4921 nspacedouble++; 4922 } 4923 /* copy data into free space, then initialize lnk */ 4924 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4925 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4926 4927 current_space->array += bnzi; 4928 current_space->local_used += bnzi; 4929 current_space->local_remaining -= bnzi; 4930 4931 bi[i+1] = bi[i] + bnzi; 4932 } 4933 4934 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4935 4936 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4937 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4938 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4939 4940 /* create symbolic parallel matrix B_mpi */ 4941 /*---------------------------------------*/ 4942 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4943 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4944 if (n==PETSC_DECIDE) { 4945 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4946 } else { 4947 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4948 } 4949 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4950 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4951 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4952 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4953 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4954 4955 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4956 B_mpi->assembled = PETSC_FALSE; 4957 merge->bi = bi; 4958 merge->bj = bj; 4959 merge->buf_ri = buf_ri; 4960 merge->buf_rj = buf_rj; 4961 merge->coi = NULL; 4962 merge->coj = NULL; 4963 merge->owners_co = NULL; 4964 4965 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4966 4967 /* attach the supporting struct to B_mpi for reuse */ 4968 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4969 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4970 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4971 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4972 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4973 *mpimat = B_mpi; 4974 4975 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4976 PetscFunctionReturn(0); 4977 } 4978 4979 /*@C 4980 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4981 matrices from each processor 4982 4983 Collective 4984 4985 Input Parameters: 4986 + comm - the communicators the parallel matrix will live on 4987 . seqmat - the input sequential matrices 4988 . m - number of local rows (or PETSC_DECIDE) 4989 . n - number of local columns (or PETSC_DECIDE) 4990 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4991 4992 Output Parameter: 4993 . mpimat - the parallel matrix generated 4994 4995 Level: advanced 4996 4997 Notes: 4998 The dimensions of the sequential matrix in each processor MUST be the same. 4999 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5000 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5001 @*/ 5002 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5003 { 5004 PetscErrorCode ierr; 5005 PetscMPIInt size; 5006 5007 PetscFunctionBegin; 5008 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5009 if (size == 1) { 5010 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5011 if (scall == MAT_INITIAL_MATRIX) { 5012 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5013 } else { 5014 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5015 } 5016 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5017 PetscFunctionReturn(0); 5018 } 5019 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5020 if (scall == MAT_INITIAL_MATRIX) { 5021 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5022 } 5023 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5024 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5025 PetscFunctionReturn(0); 5026 } 5027 5028 /*@ 5029 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5030 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5031 with MatGetSize() 5032 5033 Not Collective 5034 5035 Input Parameters: 5036 + A - the matrix 5037 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5038 5039 Output Parameter: 5040 . A_loc - the local sequential matrix generated 5041 5042 Level: developer 5043 5044 Notes: 5045 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5046 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5047 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5048 modify the values of the returned A_loc. 5049 5050 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5051 @*/ 5052 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5053 { 5054 PetscErrorCode ierr; 5055 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5056 Mat_SeqAIJ *mat,*a,*b; 5057 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5058 const PetscScalar *aa,*ba,*aav,*bav; 5059 PetscScalar *ca,*cam; 5060 PetscMPIInt size; 5061 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5062 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5063 PetscBool match; 5064 5065 PetscFunctionBegin; 5066 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5067 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5068 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5069 if (size == 1) { 5070 if (scall == MAT_INITIAL_MATRIX) { 5071 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5072 *A_loc = mpimat->A; 5073 } else if (scall == MAT_REUSE_MATRIX) { 5074 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5075 } 5076 PetscFunctionReturn(0); 5077 } 5078 5079 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5080 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5081 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5082 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5083 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5084 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5085 aa = aav; 5086 ba = bav; 5087 if (scall == MAT_INITIAL_MATRIX) { 5088 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5089 ci[0] = 0; 5090 for (i=0; i<am; i++) { 5091 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5092 } 5093 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5094 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5095 k = 0; 5096 for (i=0; i<am; i++) { 5097 ncols_o = bi[i+1] - bi[i]; 5098 ncols_d = ai[i+1] - ai[i]; 5099 /* off-diagonal portion of A */ 5100 for (jo=0; jo<ncols_o; jo++) { 5101 col = cmap[*bj]; 5102 if (col >= cstart) break; 5103 cj[k] = col; bj++; 5104 ca[k++] = *ba++; 5105 } 5106 /* diagonal portion of A */ 5107 for (j=0; j<ncols_d; j++) { 5108 cj[k] = cstart + *aj++; 5109 ca[k++] = *aa++; 5110 } 5111 /* off-diagonal portion of A */ 5112 for (j=jo; j<ncols_o; j++) { 5113 cj[k] = cmap[*bj++]; 5114 ca[k++] = *ba++; 5115 } 5116 } 5117 /* put together the new matrix */ 5118 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5119 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5120 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5121 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5122 mat->free_a = PETSC_TRUE; 5123 mat->free_ij = PETSC_TRUE; 5124 mat->nonew = 0; 5125 } else if (scall == MAT_REUSE_MATRIX) { 5126 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5127 #if defined(PETSC_USE_DEVICE) 5128 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5129 #endif 5130 ci = mat->i; cj = mat->j; cam = mat->a; 5131 for (i=0; i<am; i++) { 5132 /* off-diagonal portion of A */ 5133 ncols_o = bi[i+1] - bi[i]; 5134 for (jo=0; jo<ncols_o; jo++) { 5135 col = cmap[*bj]; 5136 if (col >= cstart) break; 5137 *cam++ = *ba++; bj++; 5138 } 5139 /* diagonal portion of A */ 5140 ncols_d = ai[i+1] - ai[i]; 5141 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5142 /* off-diagonal portion of A */ 5143 for (j=jo; j<ncols_o; j++) { 5144 *cam++ = *ba++; bj++; 5145 } 5146 } 5147 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5148 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5149 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5150 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5151 PetscFunctionReturn(0); 5152 } 5153 5154 /*@ 5155 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5156 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5157 5158 Not Collective 5159 5160 Input Parameters: 5161 + A - the matrix 5162 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5163 5164 Output Parameters: 5165 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5166 - A_loc - the local sequential matrix generated 5167 5168 Level: developer 5169 5170 Notes: 5171 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5172 5173 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5174 5175 @*/ 5176 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5177 { 5178 PetscErrorCode ierr; 5179 Mat Ao,Ad; 5180 const PetscInt *cmap; 5181 PetscMPIInt size; 5182 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5183 5184 PetscFunctionBegin; 5185 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5186 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5187 if (size == 1) { 5188 if (scall == MAT_INITIAL_MATRIX) { 5189 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5190 *A_loc = Ad; 5191 } else if (scall == MAT_REUSE_MATRIX) { 5192 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5193 } 5194 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5195 PetscFunctionReturn(0); 5196 } 5197 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5198 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5199 if (f) { 5200 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5201 } else { 5202 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5203 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5204 Mat_SeqAIJ *c; 5205 PetscInt *ai = a->i, *aj = a->j; 5206 PetscInt *bi = b->i, *bj = b->j; 5207 PetscInt *ci,*cj; 5208 const PetscScalar *aa,*ba; 5209 PetscScalar *ca; 5210 PetscInt i,j,am,dn,on; 5211 5212 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5213 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5214 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5215 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5216 if (scall == MAT_INITIAL_MATRIX) { 5217 PetscInt k; 5218 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5219 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5220 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5221 ci[0] = 0; 5222 for (i=0,k=0; i<am; i++) { 5223 const PetscInt ncols_o = bi[i+1] - bi[i]; 5224 const PetscInt ncols_d = ai[i+1] - ai[i]; 5225 ci[i+1] = ci[i] + ncols_o + ncols_d; 5226 /* diagonal portion of A */ 5227 for (j=0; j<ncols_d; j++,k++) { 5228 cj[k] = *aj++; 5229 ca[k] = *aa++; 5230 } 5231 /* off-diagonal portion of A */ 5232 for (j=0; j<ncols_o; j++,k++) { 5233 cj[k] = dn + *bj++; 5234 ca[k] = *ba++; 5235 } 5236 } 5237 /* put together the new matrix */ 5238 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5239 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5240 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5241 c = (Mat_SeqAIJ*)(*A_loc)->data; 5242 c->free_a = PETSC_TRUE; 5243 c->free_ij = PETSC_TRUE; 5244 c->nonew = 0; 5245 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5246 } else if (scall == MAT_REUSE_MATRIX) { 5247 #if defined(PETSC_HAVE_DEVICE) 5248 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5249 #endif 5250 c = (Mat_SeqAIJ*)(*A_loc)->data; 5251 ca = c->a; 5252 for (i=0; i<am; i++) { 5253 const PetscInt ncols_d = ai[i+1] - ai[i]; 5254 const PetscInt ncols_o = bi[i+1] - bi[i]; 5255 /* diagonal portion of A */ 5256 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5257 /* off-diagonal portion of A */ 5258 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5259 } 5260 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5261 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5262 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5263 if (glob) { 5264 PetscInt cst, *gidx; 5265 5266 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5267 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5268 for (i=0; i<dn; i++) gidx[i] = cst + i; 5269 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5270 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5271 } 5272 } 5273 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5274 PetscFunctionReturn(0); 5275 } 5276 5277 /*@C 5278 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5279 5280 Not Collective 5281 5282 Input Parameters: 5283 + A - the matrix 5284 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5285 - row, col - index sets of rows and columns to extract (or NULL) 5286 5287 Output Parameter: 5288 . A_loc - the local sequential matrix generated 5289 5290 Level: developer 5291 5292 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5293 5294 @*/ 5295 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5296 { 5297 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5298 PetscErrorCode ierr; 5299 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5300 IS isrowa,iscola; 5301 Mat *aloc; 5302 PetscBool match; 5303 5304 PetscFunctionBegin; 5305 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5306 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5307 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5308 if (!row) { 5309 start = A->rmap->rstart; end = A->rmap->rend; 5310 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5311 } else { 5312 isrowa = *row; 5313 } 5314 if (!col) { 5315 start = A->cmap->rstart; 5316 cmap = a->garray; 5317 nzA = a->A->cmap->n; 5318 nzB = a->B->cmap->n; 5319 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5320 ncols = 0; 5321 for (i=0; i<nzB; i++) { 5322 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5323 else break; 5324 } 5325 imark = i; 5326 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5327 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5328 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5329 } else { 5330 iscola = *col; 5331 } 5332 if (scall != MAT_INITIAL_MATRIX) { 5333 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5334 aloc[0] = *A_loc; 5335 } 5336 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5337 if (!col) { /* attach global id of condensed columns */ 5338 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5339 } 5340 *A_loc = aloc[0]; 5341 ierr = PetscFree(aloc);CHKERRQ(ierr); 5342 if (!row) { 5343 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5344 } 5345 if (!col) { 5346 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5347 } 5348 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5349 PetscFunctionReturn(0); 5350 } 5351 5352 /* 5353 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5354 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5355 * on a global size. 5356 * */ 5357 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5358 { 5359 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5360 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5361 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5362 PetscMPIInt owner; 5363 PetscSFNode *iremote,*oiremote; 5364 const PetscInt *lrowindices; 5365 PetscErrorCode ierr; 5366 PetscSF sf,osf; 5367 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5368 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5369 MPI_Comm comm; 5370 ISLocalToGlobalMapping mapping; 5371 5372 PetscFunctionBegin; 5373 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5374 /* plocalsize is the number of roots 5375 * nrows is the number of leaves 5376 * */ 5377 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5378 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5379 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5380 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5381 for (i=0;i<nrows;i++) { 5382 /* Find a remote index and an owner for a row 5383 * The row could be local or remote 5384 * */ 5385 owner = 0; 5386 lidx = 0; 5387 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5388 iremote[i].index = lidx; 5389 iremote[i].rank = owner; 5390 } 5391 /* Create SF to communicate how many nonzero columns for each row */ 5392 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5393 /* SF will figure out the number of nonzero colunms for each row, and their 5394 * offsets 5395 * */ 5396 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5397 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5398 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5399 5400 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5401 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5402 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5403 roffsets[0] = 0; 5404 roffsets[1] = 0; 5405 for (i=0;i<plocalsize;i++) { 5406 /* diag */ 5407 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5408 /* off diag */ 5409 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5410 /* compute offsets so that we relative location for each row */ 5411 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5412 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5413 } 5414 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5415 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5416 /* 'r' means root, and 'l' means leaf */ 5417 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5418 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5419 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5420 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5421 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5422 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5423 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5424 dntotalcols = 0; 5425 ontotalcols = 0; 5426 ncol = 0; 5427 for (i=0;i<nrows;i++) { 5428 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5429 ncol = PetscMax(pnnz[i],ncol); 5430 /* diag */ 5431 dntotalcols += nlcols[i*2+0]; 5432 /* off diag */ 5433 ontotalcols += nlcols[i*2+1]; 5434 } 5435 /* We do not need to figure the right number of columns 5436 * since all the calculations will be done by going through the raw data 5437 * */ 5438 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5439 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5440 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5441 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5442 /* diag */ 5443 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5444 /* off diag */ 5445 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5446 /* diag */ 5447 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5448 /* off diag */ 5449 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5450 dntotalcols = 0; 5451 ontotalcols = 0; 5452 ntotalcols = 0; 5453 for (i=0;i<nrows;i++) { 5454 owner = 0; 5455 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5456 /* Set iremote for diag matrix */ 5457 for (j=0;j<nlcols[i*2+0];j++) { 5458 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5459 iremote[dntotalcols].rank = owner; 5460 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5461 ilocal[dntotalcols++] = ntotalcols++; 5462 } 5463 /* off diag */ 5464 for (j=0;j<nlcols[i*2+1];j++) { 5465 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5466 oiremote[ontotalcols].rank = owner; 5467 oilocal[ontotalcols++] = ntotalcols++; 5468 } 5469 } 5470 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5471 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5472 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5473 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5474 /* P serves as roots and P_oth is leaves 5475 * Diag matrix 5476 * */ 5477 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5478 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5479 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5480 5481 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5482 /* Off diag */ 5483 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5484 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5485 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5486 /* We operate on the matrix internal data for saving memory */ 5487 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5488 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5489 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5490 /* Convert to global indices for diag matrix */ 5491 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5492 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5493 /* We want P_oth store global indices */ 5494 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5495 /* Use memory scalable approach */ 5496 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5497 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5498 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5499 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5500 /* Convert back to local indices */ 5501 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5502 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5503 nout = 0; 5504 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5505 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5506 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5507 /* Exchange values */ 5508 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5509 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5510 /* Stop PETSc from shrinking memory */ 5511 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5512 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5513 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5514 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5515 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5516 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5517 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5518 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5519 PetscFunctionReturn(0); 5520 } 5521 5522 /* 5523 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5524 * This supports MPIAIJ and MAIJ 5525 * */ 5526 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5527 { 5528 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5529 Mat_SeqAIJ *p_oth; 5530 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5531 IS rows,map; 5532 PetscHMapI hamp; 5533 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5534 MPI_Comm comm; 5535 PetscSF sf,osf; 5536 PetscBool has; 5537 PetscErrorCode ierr; 5538 5539 PetscFunctionBegin; 5540 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5541 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5542 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5543 * and then create a submatrix (that often is an overlapping matrix) 5544 * */ 5545 if (reuse == MAT_INITIAL_MATRIX) { 5546 /* Use a hash table to figure out unique keys */ 5547 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5548 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5549 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5550 count = 0; 5551 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5552 for (i=0;i<a->B->cmap->n;i++) { 5553 key = a->garray[i]/dof; 5554 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5555 if (!has) { 5556 mapping[i] = count; 5557 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5558 } else { 5559 /* Current 'i' has the same value the previous step */ 5560 mapping[i] = count-1; 5561 } 5562 } 5563 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5564 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5565 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count); 5566 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5567 off = 0; 5568 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5569 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5570 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5571 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5572 /* In case, the matrix was already created but users want to recreate the matrix */ 5573 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5574 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5575 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5576 ierr = ISDestroy(&map);CHKERRQ(ierr); 5577 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5578 } else if (reuse == MAT_REUSE_MATRIX) { 5579 /* If matrix was already created, we simply update values using SF objects 5580 * that as attached to the matrix ealier. 5581 * */ 5582 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5583 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5584 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5585 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5586 /* Update values in place */ 5587 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5588 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5589 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5590 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5591 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5592 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5593 PetscFunctionReturn(0); 5594 } 5595 5596 /*@C 5597 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5598 5599 Collective on Mat 5600 5601 Input Parameters: 5602 + A - the first matrix in mpiaij format 5603 . B - the second matrix in mpiaij format 5604 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5605 5606 Input/Output Parameters: 5607 + rowb - index sets of rows of B to extract (or NULL), modified on output 5608 - colb - index sets of columns of B to extract (or NULL), modified on output 5609 5610 Output Parameter: 5611 . B_seq - the sequential matrix generated 5612 5613 Level: developer 5614 5615 @*/ 5616 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5617 { 5618 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5619 PetscErrorCode ierr; 5620 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5621 IS isrowb,iscolb; 5622 Mat *bseq=NULL; 5623 5624 PetscFunctionBegin; 5625 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5626 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5627 } 5628 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5629 5630 if (scall == MAT_INITIAL_MATRIX) { 5631 start = A->cmap->rstart; 5632 cmap = a->garray; 5633 nzA = a->A->cmap->n; 5634 nzB = a->B->cmap->n; 5635 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5636 ncols = 0; 5637 for (i=0; i<nzB; i++) { /* row < local row index */ 5638 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5639 else break; 5640 } 5641 imark = i; 5642 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5643 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5644 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5645 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5646 } else { 5647 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5648 isrowb = *rowb; iscolb = *colb; 5649 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5650 bseq[0] = *B_seq; 5651 } 5652 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5653 *B_seq = bseq[0]; 5654 ierr = PetscFree(bseq);CHKERRQ(ierr); 5655 if (!rowb) { 5656 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5657 } else { 5658 *rowb = isrowb; 5659 } 5660 if (!colb) { 5661 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5662 } else { 5663 *colb = iscolb; 5664 } 5665 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5666 PetscFunctionReturn(0); 5667 } 5668 5669 /* 5670 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5671 of the OFF-DIAGONAL portion of local A 5672 5673 Collective on Mat 5674 5675 Input Parameters: 5676 + A,B - the matrices in mpiaij format 5677 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5678 5679 Output Parameter: 5680 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5681 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5682 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5683 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5684 5685 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5686 for this matrix. This is not desirable.. 5687 5688 Level: developer 5689 5690 */ 5691 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5692 { 5693 PetscErrorCode ierr; 5694 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5695 Mat_SeqAIJ *b_oth; 5696 VecScatter ctx; 5697 MPI_Comm comm; 5698 const PetscMPIInt *rprocs,*sprocs; 5699 const PetscInt *srow,*rstarts,*sstarts; 5700 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5701 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5702 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5703 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5704 PetscMPIInt size,tag,rank,nreqs; 5705 5706 PetscFunctionBegin; 5707 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5708 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5709 5710 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5711 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5712 } 5713 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5714 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5715 5716 if (size == 1) { 5717 startsj_s = NULL; 5718 bufa_ptr = NULL; 5719 *B_oth = NULL; 5720 PetscFunctionReturn(0); 5721 } 5722 5723 ctx = a->Mvctx; 5724 tag = ((PetscObject)ctx)->tag; 5725 5726 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5727 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5728 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5729 ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr); 5730 ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr); 5731 rwaits = reqs; 5732 swaits = reqs + nrecvs; 5733 5734 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5735 if (scall == MAT_INITIAL_MATRIX) { 5736 /* i-array */ 5737 /*---------*/ 5738 /* post receives */ 5739 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5740 for (i=0; i<nrecvs; i++) { 5741 rowlen = rvalues + rstarts[i]*rbs; 5742 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5743 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5744 } 5745 5746 /* pack the outgoing message */ 5747 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5748 5749 sstartsj[0] = 0; 5750 rstartsj[0] = 0; 5751 len = 0; /* total length of j or a array to be sent */ 5752 if (nsends) { 5753 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5754 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5755 } 5756 for (i=0; i<nsends; i++) { 5757 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5758 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5759 for (j=0; j<nrows; j++) { 5760 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5761 for (l=0; l<sbs; l++) { 5762 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5763 5764 rowlen[j*sbs+l] = ncols; 5765 5766 len += ncols; 5767 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5768 } 5769 k++; 5770 } 5771 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5772 5773 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5774 } 5775 /* recvs and sends of i-array are completed */ 5776 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5777 ierr = PetscFree(svalues);CHKERRQ(ierr); 5778 5779 /* allocate buffers for sending j and a arrays */ 5780 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5781 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5782 5783 /* create i-array of B_oth */ 5784 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5785 5786 b_othi[0] = 0; 5787 len = 0; /* total length of j or a array to be received */ 5788 k = 0; 5789 for (i=0; i<nrecvs; i++) { 5790 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5791 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5792 for (j=0; j<nrows; j++) { 5793 b_othi[k+1] = b_othi[k] + rowlen[j]; 5794 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5795 k++; 5796 } 5797 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5798 } 5799 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5800 5801 /* allocate space for j and a arrrays of B_oth */ 5802 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5803 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5804 5805 /* j-array */ 5806 /*---------*/ 5807 /* post receives of j-array */ 5808 for (i=0; i<nrecvs; i++) { 5809 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5810 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5811 } 5812 5813 /* pack the outgoing message j-array */ 5814 if (nsends) k = sstarts[0]; 5815 for (i=0; i<nsends; i++) { 5816 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5817 bufJ = bufj+sstartsj[i]; 5818 for (j=0; j<nrows; j++) { 5819 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5820 for (ll=0; ll<sbs; ll++) { 5821 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5822 for (l=0; l<ncols; l++) { 5823 *bufJ++ = cols[l]; 5824 } 5825 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5826 } 5827 } 5828 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5829 } 5830 5831 /* recvs and sends of j-array are completed */ 5832 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5833 } else if (scall == MAT_REUSE_MATRIX) { 5834 sstartsj = *startsj_s; 5835 rstartsj = *startsj_r; 5836 bufa = *bufa_ptr; 5837 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5838 b_otha = b_oth->a; 5839 #if defined(PETSC_HAVE_DEVICE) 5840 (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU; 5841 #endif 5842 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5843 5844 /* a-array */ 5845 /*---------*/ 5846 /* post receives of a-array */ 5847 for (i=0; i<nrecvs; i++) { 5848 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5849 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5850 } 5851 5852 /* pack the outgoing message a-array */ 5853 if (nsends) k = sstarts[0]; 5854 for (i=0; i<nsends; i++) { 5855 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5856 bufA = bufa+sstartsj[i]; 5857 for (j=0; j<nrows; j++) { 5858 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5859 for (ll=0; ll<sbs; ll++) { 5860 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5861 for (l=0; l<ncols; l++) { 5862 *bufA++ = vals[l]; 5863 } 5864 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5865 } 5866 } 5867 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5868 } 5869 /* recvs and sends of a-array are completed */ 5870 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5871 ierr = PetscFree(reqs);CHKERRQ(ierr); 5872 5873 if (scall == MAT_INITIAL_MATRIX) { 5874 /* put together the new matrix */ 5875 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5876 5877 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5878 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5879 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5880 b_oth->free_a = PETSC_TRUE; 5881 b_oth->free_ij = PETSC_TRUE; 5882 b_oth->nonew = 0; 5883 5884 ierr = PetscFree(bufj);CHKERRQ(ierr); 5885 if (!startsj_s || !bufa_ptr) { 5886 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5887 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5888 } else { 5889 *startsj_s = sstartsj; 5890 *startsj_r = rstartsj; 5891 *bufa_ptr = bufa; 5892 } 5893 } 5894 5895 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5896 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5897 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5898 PetscFunctionReturn(0); 5899 } 5900 5901 /*@C 5902 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5903 5904 Not Collective 5905 5906 Input Parameter: 5907 . A - The matrix in mpiaij format 5908 5909 Output Parameters: 5910 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5911 . colmap - A map from global column index to local index into lvec 5912 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5913 5914 Level: developer 5915 5916 @*/ 5917 #if defined(PETSC_USE_CTABLE) 5918 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5919 #else 5920 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5921 #endif 5922 { 5923 Mat_MPIAIJ *a; 5924 5925 PetscFunctionBegin; 5926 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5927 PetscValidPointer(lvec, 2); 5928 PetscValidPointer(colmap, 3); 5929 PetscValidPointer(multScatter, 4); 5930 a = (Mat_MPIAIJ*) A->data; 5931 if (lvec) *lvec = a->lvec; 5932 if (colmap) *colmap = a->colmap; 5933 if (multScatter) *multScatter = a->Mvctx; 5934 PetscFunctionReturn(0); 5935 } 5936 5937 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5938 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5940 #if defined(PETSC_HAVE_MKL_SPARSE) 5941 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5942 #endif 5943 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5944 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5945 #if defined(PETSC_HAVE_ELEMENTAL) 5946 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5947 #endif 5948 #if defined(PETSC_HAVE_SCALAPACK) 5949 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5950 #endif 5951 #if defined(PETSC_HAVE_HYPRE) 5952 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5953 #endif 5954 #if defined(PETSC_HAVE_CUDA) 5955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5956 #endif 5957 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5958 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5959 #endif 5960 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5961 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5962 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5963 5964 /* 5965 Computes (B'*A')' since computing B*A directly is untenable 5966 5967 n p p 5968 [ ] [ ] [ ] 5969 m [ A ] * n [ B ] = m [ C ] 5970 [ ] [ ] [ ] 5971 5972 */ 5973 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5974 { 5975 PetscErrorCode ierr; 5976 Mat At,Bt,Ct; 5977 5978 PetscFunctionBegin; 5979 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5980 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5981 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5982 ierr = MatDestroy(&At);CHKERRQ(ierr); 5983 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5984 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5985 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5986 PetscFunctionReturn(0); 5987 } 5988 5989 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5990 { 5991 PetscErrorCode ierr; 5992 PetscBool cisdense; 5993 5994 PetscFunctionBegin; 5995 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5996 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5997 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5998 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5999 if (!cisdense) { 6000 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6001 } 6002 ierr = MatSetUp(C);CHKERRQ(ierr); 6003 6004 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6005 PetscFunctionReturn(0); 6006 } 6007 6008 /* ----------------------------------------------------------------*/ 6009 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6010 { 6011 Mat_Product *product = C->product; 6012 Mat A = product->A,B=product->B; 6013 6014 PetscFunctionBegin; 6015 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6016 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6017 6018 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6019 C->ops->productsymbolic = MatProductSymbolic_AB; 6020 PetscFunctionReturn(0); 6021 } 6022 6023 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6024 { 6025 PetscErrorCode ierr; 6026 Mat_Product *product = C->product; 6027 6028 PetscFunctionBegin; 6029 if (product->type == MATPRODUCT_AB) { 6030 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6031 } 6032 PetscFunctionReturn(0); 6033 } 6034 /* ----------------------------------------------------------------*/ 6035 6036 /*MC 6037 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6038 6039 Options Database Keys: 6040 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6041 6042 Level: beginner 6043 6044 Notes: 6045 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6046 in this case the values associated with the rows and columns one passes in are set to zero 6047 in the matrix 6048 6049 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6050 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6051 6052 .seealso: MatCreateAIJ() 6053 M*/ 6054 6055 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6056 { 6057 Mat_MPIAIJ *b; 6058 PetscErrorCode ierr; 6059 PetscMPIInt size; 6060 6061 PetscFunctionBegin; 6062 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6063 6064 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6065 B->data = (void*)b; 6066 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6067 B->assembled = PETSC_FALSE; 6068 B->insertmode = NOT_SET_VALUES; 6069 b->size = size; 6070 6071 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6072 6073 /* build cache for off array entries formed */ 6074 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6075 6076 b->donotstash = PETSC_FALSE; 6077 b->colmap = NULL; 6078 b->garray = NULL; 6079 b->roworiented = PETSC_TRUE; 6080 6081 /* stuff used for matrix vector multiply */ 6082 b->lvec = NULL; 6083 b->Mvctx = NULL; 6084 6085 /* stuff for MatGetRow() */ 6086 b->rowindices = NULL; 6087 b->rowvalues = NULL; 6088 b->getrowactive = PETSC_FALSE; 6089 6090 /* flexible pointer used in CUSPARSE classes */ 6091 b->spptr = NULL; 6092 6093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6103 #if defined(PETSC_HAVE_CUDA) 6104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6105 #endif 6106 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6108 #endif 6109 #if defined(PETSC_HAVE_MKL_SPARSE) 6110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6111 #endif 6112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6114 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 6116 #if defined(PETSC_HAVE_ELEMENTAL) 6117 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6118 #endif 6119 #if defined(PETSC_HAVE_SCALAPACK) 6120 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6121 #endif 6122 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6123 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6124 #if defined(PETSC_HAVE_HYPRE) 6125 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6126 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6127 #endif 6128 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6129 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6130 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6131 PetscFunctionReturn(0); 6132 } 6133 6134 /*@C 6135 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6136 and "off-diagonal" part of the matrix in CSR format. 6137 6138 Collective 6139 6140 Input Parameters: 6141 + comm - MPI communicator 6142 . m - number of local rows (Cannot be PETSC_DECIDE) 6143 . n - This value should be the same as the local size used in creating the 6144 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6145 calculated if N is given) For square matrices n is almost always m. 6146 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6147 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6148 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6149 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6150 . a - matrix values 6151 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6152 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6153 - oa - matrix values 6154 6155 Output Parameter: 6156 . mat - the matrix 6157 6158 Level: advanced 6159 6160 Notes: 6161 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6162 must free the arrays once the matrix has been destroyed and not before. 6163 6164 The i and j indices are 0 based 6165 6166 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6167 6168 This sets local rows and cannot be used to set off-processor values. 6169 6170 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6171 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6172 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6173 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6174 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6175 communication if it is known that only local entries will be set. 6176 6177 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6178 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6179 @*/ 6180 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6181 { 6182 PetscErrorCode ierr; 6183 Mat_MPIAIJ *maij; 6184 6185 PetscFunctionBegin; 6186 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6187 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6188 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6189 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6190 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6191 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6192 maij = (Mat_MPIAIJ*) (*mat)->data; 6193 6194 (*mat)->preallocated = PETSC_TRUE; 6195 6196 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6197 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6198 6199 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6200 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6201 6202 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6203 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6204 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6205 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6206 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6207 PetscFunctionReturn(0); 6208 } 6209 6210 /* 6211 Special version for direct calls from Fortran 6212 */ 6213 #include <petsc/private/fortranimpl.h> 6214 6215 /* Change these macros so can be used in void function */ 6216 #undef CHKERRQ 6217 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6218 #undef SETERRQ2 6219 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6220 #undef SETERRQ3 6221 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6222 #undef SETERRQ 6223 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6224 6225 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6226 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6227 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6228 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6229 #else 6230 #endif 6231 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6232 { 6233 Mat mat = *mmat; 6234 PetscInt m = *mm, n = *mn; 6235 InsertMode addv = *maddv; 6236 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6237 PetscScalar value; 6238 PetscErrorCode ierr; 6239 6240 MatCheckPreallocated(mat,1); 6241 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6242 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6243 { 6244 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6245 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6246 PetscBool roworiented = aij->roworiented; 6247 6248 /* Some Variables required in the macro */ 6249 Mat A = aij->A; 6250 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6251 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6252 MatScalar *aa = a->a; 6253 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6254 Mat B = aij->B; 6255 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6256 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6257 MatScalar *ba = b->a; 6258 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6259 * cannot use "#if defined" inside a macro. */ 6260 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6261 6262 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6263 PetscInt nonew = a->nonew; 6264 MatScalar *ap1,*ap2; 6265 6266 PetscFunctionBegin; 6267 for (i=0; i<m; i++) { 6268 if (im[i] < 0) continue; 6269 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6270 if (im[i] >= rstart && im[i] < rend) { 6271 row = im[i] - rstart; 6272 lastcol1 = -1; 6273 rp1 = aj + ai[row]; 6274 ap1 = aa + ai[row]; 6275 rmax1 = aimax[row]; 6276 nrow1 = ailen[row]; 6277 low1 = 0; 6278 high1 = nrow1; 6279 lastcol2 = -1; 6280 rp2 = bj + bi[row]; 6281 ap2 = ba + bi[row]; 6282 rmax2 = bimax[row]; 6283 nrow2 = bilen[row]; 6284 low2 = 0; 6285 high2 = nrow2; 6286 6287 for (j=0; j<n; j++) { 6288 if (roworiented) value = v[i*n+j]; 6289 else value = v[i+j*m]; 6290 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6291 if (in[j] >= cstart && in[j] < cend) { 6292 col = in[j] - cstart; 6293 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6294 #if defined(PETSC_HAVE_DEVICE) 6295 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6296 #endif 6297 } else if (in[j] < 0) continue; 6298 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6299 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6300 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6301 } else { 6302 if (mat->was_assembled) { 6303 if (!aij->colmap) { 6304 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6305 } 6306 #if defined(PETSC_USE_CTABLE) 6307 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6308 col--; 6309 #else 6310 col = aij->colmap[in[j]] - 1; 6311 #endif 6312 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6313 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6314 col = in[j]; 6315 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6316 B = aij->B; 6317 b = (Mat_SeqAIJ*)B->data; 6318 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6319 rp2 = bj + bi[row]; 6320 ap2 = ba + bi[row]; 6321 rmax2 = bimax[row]; 6322 nrow2 = bilen[row]; 6323 low2 = 0; 6324 high2 = nrow2; 6325 bm = aij->B->rmap->n; 6326 ba = b->a; 6327 inserted = PETSC_FALSE; 6328 } 6329 } else col = in[j]; 6330 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6331 #if defined(PETSC_HAVE_DEVICE) 6332 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6333 #endif 6334 } 6335 } 6336 } else if (!aij->donotstash) { 6337 if (roworiented) { 6338 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6339 } else { 6340 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6341 } 6342 } 6343 } 6344 } 6345 PetscFunctionReturnVoid(); 6346 } 6347 6348 typedef struct { 6349 Mat *mp; /* intermediate products */ 6350 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6351 PetscInt cp; /* number of intermediate products */ 6352 6353 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6354 PetscInt *startsj_s,*startsj_r; 6355 PetscScalar *bufa; 6356 Mat P_oth; 6357 6358 /* may take advantage of merging product->B */ 6359 Mat Bloc; /* B-local by merging diag and off-diag */ 6360 6361 /* cusparse does not have support to split between symbolic and numeric phases. 6362 When api_user is true, we don't need to update the numerical values 6363 of the temporary storage */ 6364 PetscBool reusesym; 6365 6366 /* support for COO values insertion */ 6367 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6368 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6369 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6370 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6371 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6372 PetscMemType mtype; 6373 6374 /* customization */ 6375 PetscBool abmerge; 6376 PetscBool P_oth_bind; 6377 } MatMatMPIAIJBACKEND; 6378 6379 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6380 { 6381 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6382 PetscInt i; 6383 PetscErrorCode ierr; 6384 6385 PetscFunctionBegin; 6386 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6387 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6388 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6389 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6390 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6391 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6392 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6393 for (i = 0; i < mmdata->cp; i++) { 6394 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6395 } 6396 ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr); 6397 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6398 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6399 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6400 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6401 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6402 PetscFunctionReturn(0); 6403 } 6404 6405 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6406 { 6407 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6408 PetscErrorCode ierr; 6409 6410 PetscFunctionBegin; 6411 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6412 if (f) { 6413 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6414 } else { 6415 const PetscScalar *vv; 6416 6417 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6418 if (n && idx) { 6419 PetscScalar *w = v; 6420 const PetscInt *oi = idx; 6421 PetscInt j; 6422 6423 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6424 } else { 6425 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6426 } 6427 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6428 } 6429 PetscFunctionReturn(0); 6430 } 6431 6432 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6433 { 6434 MatMatMPIAIJBACKEND *mmdata; 6435 PetscInt i,n_d,n_o; 6436 PetscErrorCode ierr; 6437 6438 PetscFunctionBegin; 6439 MatCheckProduct(C,1); 6440 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6441 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6442 if (!mmdata->reusesym) { /* update temporary matrices */ 6443 if (mmdata->P_oth) { 6444 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6445 } 6446 if (mmdata->Bloc) { 6447 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6448 } 6449 } 6450 mmdata->reusesym = PETSC_FALSE; 6451 6452 for (i = 0; i < mmdata->cp; i++) { 6453 if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6454 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6455 } 6456 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6457 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6458 6459 if (mmdata->mptmp[i]) continue; 6460 if (noff) { 6461 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6462 6463 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6464 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6465 n_o += noff; 6466 n_d += nown; 6467 } else { 6468 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6469 6470 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6471 n_d += mm->nz; 6472 } 6473 } 6474 if (mmdata->hasoffproc) { /* offprocess insertion */ 6475 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6476 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6477 } 6478 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6479 PetscFunctionReturn(0); 6480 } 6481 6482 /* Support for Pt * A, A * P, or Pt * A * P */ 6483 #define MAX_NUMBER_INTERMEDIATE 4 6484 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6485 { 6486 Mat_Product *product = C->product; 6487 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6488 Mat_MPIAIJ *a,*p; 6489 MatMatMPIAIJBACKEND *mmdata; 6490 ISLocalToGlobalMapping P_oth_l2g = NULL; 6491 IS glob = NULL; 6492 const char *prefix; 6493 char pprefix[256]; 6494 const PetscInt *globidx,*P_oth_idx; 6495 PetscInt i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j; 6496 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6497 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6498 /* a base offset; type-2: sparse with a local to global map table */ 6499 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6500 6501 MatProductType ptype; 6502 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6503 PetscMPIInt size; 6504 PetscErrorCode ierr; 6505 6506 PetscFunctionBegin; 6507 MatCheckProduct(C,1); 6508 if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6509 ptype = product->type; 6510 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6511 ptype = MATPRODUCT_AB; 6512 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6513 } 6514 switch (ptype) { 6515 case MATPRODUCT_AB: 6516 A = product->A; 6517 P = product->B; 6518 m = A->rmap->n; 6519 n = P->cmap->n; 6520 M = A->rmap->N; 6521 N = P->cmap->N; 6522 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6523 break; 6524 case MATPRODUCT_AtB: 6525 P = product->A; 6526 A = product->B; 6527 m = P->cmap->n; 6528 n = A->cmap->n; 6529 M = P->cmap->N; 6530 N = A->cmap->N; 6531 hasoffproc = PETSC_TRUE; 6532 break; 6533 case MATPRODUCT_PtAP: 6534 A = product->A; 6535 P = product->B; 6536 m = P->cmap->n; 6537 n = P->cmap->n; 6538 M = P->cmap->N; 6539 N = P->cmap->N; 6540 hasoffproc = PETSC_TRUE; 6541 break; 6542 default: 6543 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6544 } 6545 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr); 6546 if (size == 1) hasoffproc = PETSC_FALSE; 6547 6548 /* defaults */ 6549 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6550 mp[i] = NULL; 6551 mptmp[i] = PETSC_FALSE; 6552 rmapt[i] = -1; 6553 cmapt[i] = -1; 6554 rmapa[i] = NULL; 6555 cmapa[i] = NULL; 6556 } 6557 6558 /* customization */ 6559 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 6560 mmdata->reusesym = product->api_user; 6561 if (ptype == MATPRODUCT_AB) { 6562 if (product->api_user) { 6563 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6564 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6565 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6566 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6567 } else { 6568 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6569 ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6570 ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6571 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6572 } 6573 } else if (ptype == MATPRODUCT_PtAP) { 6574 if (product->api_user) { 6575 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6576 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6577 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6578 } else { 6579 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6580 ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6581 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6582 } 6583 } 6584 a = (Mat_MPIAIJ*)A->data; 6585 p = (Mat_MPIAIJ*)P->data; 6586 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 6587 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 6588 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 6589 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6590 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 6591 6592 cp = 0; 6593 switch (ptype) { 6594 case MATPRODUCT_AB: /* A * P */ 6595 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6596 6597 /* A_diag * P_local (merged or not) */ 6598 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 6599 /* P is product->B */ 6600 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6601 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6602 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6603 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6604 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6605 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6606 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6607 mp[cp]->product->api_user = product->api_user; 6608 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6609 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6610 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6611 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6612 rmapt[cp] = 1; 6613 cmapt[cp] = 2; 6614 cmapa[cp] = globidx; 6615 mptmp[cp] = PETSC_FALSE; 6616 cp++; 6617 } else { /* A_diag * P_diag and A_diag * P_off */ 6618 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 6619 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6620 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6621 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6622 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6623 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6624 mp[cp]->product->api_user = product->api_user; 6625 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6626 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6627 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6628 rmapt[cp] = 1; 6629 cmapt[cp] = 1; 6630 mptmp[cp] = PETSC_FALSE; 6631 cp++; 6632 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 6633 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6634 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6635 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6636 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6637 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6638 mp[cp]->product->api_user = product->api_user; 6639 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6640 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6641 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6642 rmapt[cp] = 1; 6643 cmapt[cp] = 2; 6644 cmapa[cp] = p->garray; 6645 mptmp[cp] = PETSC_FALSE; 6646 cp++; 6647 } 6648 6649 /* A_off * P_other */ 6650 if (mmdata->P_oth) { 6651 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */ 6652 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6653 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6654 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6655 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6656 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6657 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6658 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6659 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6660 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6661 mp[cp]->product->api_user = product->api_user; 6662 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6663 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6664 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6665 rmapt[cp] = 1; 6666 cmapt[cp] = 2; 6667 cmapa[cp] = P_oth_idx; 6668 mptmp[cp] = PETSC_FALSE; 6669 cp++; 6670 } 6671 break; 6672 6673 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6674 /* A is product->B */ 6675 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6676 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 6677 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6678 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6679 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6680 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6681 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6682 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6683 mp[cp]->product->api_user = product->api_user; 6684 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6685 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6686 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6687 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6688 rmapt[cp] = 2; 6689 rmapa[cp] = globidx; 6690 cmapt[cp] = 2; 6691 cmapa[cp] = globidx; 6692 mptmp[cp] = PETSC_FALSE; 6693 cp++; 6694 } else { 6695 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6696 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6697 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6698 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6699 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6700 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6701 mp[cp]->product->api_user = product->api_user; 6702 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6703 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6704 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6705 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6706 rmapt[cp] = 1; 6707 cmapt[cp] = 2; 6708 cmapa[cp] = globidx; 6709 mptmp[cp] = PETSC_FALSE; 6710 cp++; 6711 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6712 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6713 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6714 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6715 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6716 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6717 mp[cp]->product->api_user = product->api_user; 6718 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6719 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6720 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6721 rmapt[cp] = 2; 6722 rmapa[cp] = p->garray; 6723 cmapt[cp] = 2; 6724 cmapa[cp] = globidx; 6725 mptmp[cp] = PETSC_FALSE; 6726 cp++; 6727 } 6728 break; 6729 case MATPRODUCT_PtAP: 6730 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6731 /* P is product->B */ 6732 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6733 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6734 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 6735 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6736 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6737 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6738 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6739 mp[cp]->product->api_user = product->api_user; 6740 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6741 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6742 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6743 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6744 rmapt[cp] = 2; 6745 rmapa[cp] = globidx; 6746 cmapt[cp] = 2; 6747 cmapa[cp] = globidx; 6748 mptmp[cp] = PETSC_FALSE; 6749 cp++; 6750 if (mmdata->P_oth) { 6751 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6752 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6753 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6754 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6755 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6756 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6757 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6758 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6759 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6760 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6761 mp[cp]->product->api_user = product->api_user; 6762 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6763 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6764 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6765 mptmp[cp] = PETSC_TRUE; 6766 cp++; 6767 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 6768 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6769 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6770 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6771 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6772 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6773 mp[cp]->product->api_user = product->api_user; 6774 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6775 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6776 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6777 rmapt[cp] = 2; 6778 rmapa[cp] = globidx; 6779 cmapt[cp] = 2; 6780 cmapa[cp] = P_oth_idx; 6781 mptmp[cp] = PETSC_FALSE; 6782 cp++; 6783 } 6784 break; 6785 default: 6786 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6787 } 6788 /* sanity check */ 6789 if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i); 6790 6791 ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr); 6792 for (i = 0; i < cp; i++) { 6793 mmdata->mp[i] = mp[i]; 6794 mmdata->mptmp[i] = mptmp[i]; 6795 } 6796 mmdata->cp = cp; 6797 C->product->data = mmdata; 6798 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 6799 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 6800 6801 /* memory type */ 6802 mmdata->mtype = PETSC_MEMTYPE_HOST; 6803 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 6804 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 6805 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 6806 // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented 6807 //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 6808 6809 /* prepare coo coordinates for values insertion */ 6810 6811 /* count total nonzeros of those intermediate seqaij Mats 6812 ncoo_d: # of nonzeros of matrices that do not have offproc entries 6813 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 6814 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 6815 */ 6816 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 6817 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6818 if (mptmp[cp]) continue; 6819 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 6820 const PetscInt *rmap = rmapa[cp]; 6821 const PetscInt mr = mp[cp]->rmap->n; 6822 const PetscInt rs = C->rmap->rstart; 6823 const PetscInt re = C->rmap->rend; 6824 const PetscInt *ii = mm->i; 6825 for (i = 0; i < mr; i++) { 6826 const PetscInt gr = rmap[i]; 6827 const PetscInt nz = ii[i+1] - ii[i]; 6828 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 6829 else ncoo_oown += nz; /* this row is local */ 6830 } 6831 } else ncoo_d += mm->nz; 6832 } 6833 6834 /* 6835 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 6836 6837 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 6838 6839 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 6840 6841 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 6842 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 6843 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 6844 6845 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 6846 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 6847 */ 6848 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */ 6849 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 6850 6851 /* gather (i,j) of nonzeros inserted by remote procs */ 6852 if (hasoffproc) { 6853 PetscSF msf; 6854 PetscInt ncoo2,*coo_i2,*coo_j2; 6855 6856 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 6857 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 6858 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */ 6859 6860 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 6861 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6862 PetscInt *idxoff = mmdata->off[cp]; 6863 PetscInt *idxown = mmdata->own[cp]; 6864 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 6865 const PetscInt *rmap = rmapa[cp]; 6866 const PetscInt *cmap = cmapa[cp]; 6867 const PetscInt *ii = mm->i; 6868 PetscInt *coi = coo_i + ncoo_o; 6869 PetscInt *coj = coo_j + ncoo_o; 6870 const PetscInt mr = mp[cp]->rmap->n; 6871 const PetscInt rs = C->rmap->rstart; 6872 const PetscInt re = C->rmap->rend; 6873 const PetscInt cs = C->cmap->rstart; 6874 for (i = 0; i < mr; i++) { 6875 const PetscInt *jj = mm->j + ii[i]; 6876 const PetscInt gr = rmap[i]; 6877 const PetscInt nz = ii[i+1] - ii[i]; 6878 if (gr < rs || gr >= re) { /* this is an offproc row */ 6879 for (j = ii[i]; j < ii[i+1]; j++) { 6880 *coi++ = gr; 6881 *idxoff++ = j; 6882 } 6883 if (!cmapt[cp]) { /* already global */ 6884 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6885 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6886 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6887 } else { /* offdiag */ 6888 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6889 } 6890 ncoo_o += nz; 6891 } else { /* this is a local row */ 6892 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 6893 } 6894 } 6895 } 6896 mmdata->off[cp + 1] = idxoff; 6897 mmdata->own[cp + 1] = idxown; 6898 } 6899 6900 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6901 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 6902 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 6903 ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr); 6904 ncoo = ncoo_d + ncoo_oown + ncoo2; 6905 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 6906 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */ 6907 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6908 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6909 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6910 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6911 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 6912 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 6913 coo_i = coo_i2; 6914 coo_j = coo_j2; 6915 } else { /* no offproc values insertion */ 6916 ncoo = ncoo_d; 6917 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 6918 6919 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6920 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 6921 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 6922 } 6923 mmdata->hasoffproc = hasoffproc; 6924 6925 /* gather (i,j) of nonzeros inserted locally */ 6926 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 6927 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6928 PetscInt *coi = coo_i + ncoo_d; 6929 PetscInt *coj = coo_j + ncoo_d; 6930 const PetscInt *jj = mm->j; 6931 const PetscInt *ii = mm->i; 6932 const PetscInt *cmap = cmapa[cp]; 6933 const PetscInt *rmap = rmapa[cp]; 6934 const PetscInt mr = mp[cp]->rmap->n; 6935 const PetscInt rs = C->rmap->rstart; 6936 const PetscInt re = C->rmap->rend; 6937 const PetscInt cs = C->cmap->rstart; 6938 6939 if (mptmp[cp]) continue; 6940 if (rmapt[cp] == 1) { /* consecutive rows */ 6941 /* fill coo_i */ 6942 for (i = 0; i < mr; i++) { 6943 const PetscInt gr = i + rs; 6944 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 6945 } 6946 /* fill coo_j */ 6947 if (!cmapt[cp]) { /* type-0, already global */ 6948 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 6949 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 6950 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 6951 } else { /* type-2, local to global for sparse columns */ 6952 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 6953 } 6954 ncoo_d += mm->nz; 6955 } else if (rmapt[cp] == 2) { /* sparse rows */ 6956 for (i = 0; i < mr; i++) { 6957 const PetscInt *jj = mm->j + ii[i]; 6958 const PetscInt gr = rmap[i]; 6959 const PetscInt nz = ii[i+1] - ii[i]; 6960 if (gr >= rs && gr < re) { /* local rows */ 6961 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 6962 if (!cmapt[cp]) { /* type-0, already global */ 6963 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6964 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6965 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6966 } else { /* type-2, local to global for sparse columns */ 6967 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6968 } 6969 ncoo_d += nz; 6970 } 6971 } 6972 } 6973 } 6974 if (glob) { 6975 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 6976 } 6977 ierr = ISDestroy(&glob);CHKERRQ(ierr); 6978 if (P_oth_l2g) { 6979 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6980 } 6981 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 6982 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 6983 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 6984 6985 /* preallocate with COO data */ 6986 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 6987 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6988 PetscFunctionReturn(0); 6989 } 6990 6991 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 6992 { 6993 Mat_Product *product = mat->product; 6994 PetscErrorCode ierr; 6995 #if defined(PETSC_HAVE_DEVICE) 6996 PetscBool match = PETSC_FALSE; 6997 PetscBool usecpu = PETSC_FALSE; 6998 #else 6999 PetscBool match = PETSC_TRUE; 7000 #endif 7001 7002 PetscFunctionBegin; 7003 MatCheckProduct(mat,1); 7004 #if defined(PETSC_HAVE_DEVICE) 7005 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7006 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 7007 } 7008 if (match) { /* we can always fallback to the CPU if requested */ 7009 switch (product->type) { 7010 case MATPRODUCT_AB: 7011 if (product->api_user) { 7012 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 7013 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7014 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7015 } else { 7016 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 7017 ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7018 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7019 } 7020 break; 7021 case MATPRODUCT_AtB: 7022 if (product->api_user) { 7023 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 7024 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7025 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7026 } else { 7027 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 7028 ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7029 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7030 } 7031 break; 7032 case MATPRODUCT_PtAP: 7033 if (product->api_user) { 7034 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 7035 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7036 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7037 } else { 7038 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 7039 ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7040 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7041 } 7042 break; 7043 default: 7044 break; 7045 } 7046 match = (PetscBool)!usecpu; 7047 } 7048 #endif 7049 if (match) { 7050 switch (product->type) { 7051 case MATPRODUCT_AB: 7052 case MATPRODUCT_AtB: 7053 case MATPRODUCT_PtAP: 7054 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7055 break; 7056 default: 7057 break; 7058 } 7059 } 7060 /* fallback to MPIAIJ ops */ 7061 if (!mat->ops->productsymbolic) { 7062 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7063 } 7064 PetscFunctionReturn(0); 7065 } 7066