1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 66 { 67 PetscErrorCode ierr; 68 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 69 70 PetscFunctionBegin; 71 if (mat->A) { 72 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 73 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 74 } 75 PetscFunctionReturn(0); 76 } 77 78 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 79 { 80 PetscErrorCode ierr; 81 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 82 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 83 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 84 const PetscInt *ia,*ib; 85 const MatScalar *aa,*bb,*aav,*bav; 86 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 87 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 88 89 PetscFunctionBegin; 90 *keptrows = NULL; 91 92 ia = a->i; 93 ib = b->i; 94 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 95 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) { 100 cnt++; 101 goto ok1; 102 } 103 aa = aav + ia[i]; 104 for (j=0; j<na; j++) { 105 if (aa[j] != 0.0) goto ok1; 106 } 107 bb = bav + ib[i]; 108 for (j=0; j <nb; j++) { 109 if (bb[j] != 0.0) goto ok1; 110 } 111 cnt++; 112 ok1:; 113 } 114 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr); 115 if (!n0rows) { 116 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 117 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 121 cnt = 0; 122 for (i=0; i<m; i++) { 123 na = ia[i+1] - ia[i]; 124 nb = ib[i+1] - ib[i]; 125 if (!na && !nb) continue; 126 aa = aav + ia[i]; 127 for (j=0; j<na;j++) { 128 if (aa[j] != 0.0) { 129 rows[cnt++] = rstart + i; 130 goto ok2; 131 } 132 } 133 bb = bav + ib[i]; 134 for (j=0; j<nb; j++) { 135 if (bb[j] != 0.0) { 136 rows[cnt++] = rstart + i; 137 goto ok2; 138 } 139 } 140 ok2:; 141 } 142 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 143 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 144 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 149 { 150 PetscErrorCode ierr; 151 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 152 PetscBool cong; 153 154 PetscFunctionBegin; 155 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 156 if (Y->assembled && cong) { 157 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 158 } else { 159 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 160 } 161 PetscFunctionReturn(0); 162 } 163 164 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 165 { 166 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 167 PetscErrorCode ierr; 168 PetscInt i,rstart,nrows,*rows; 169 170 PetscFunctionBegin; 171 *zrows = NULL; 172 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 173 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 174 for (i=0; i<nrows; i++) rows[i] += rstart; 175 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 180 { 181 PetscErrorCode ierr; 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 183 PetscInt i,m,n,*garray = aij->garray; 184 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 185 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 186 PetscReal *work; 187 const PetscScalar *dummy; 188 189 PetscFunctionBegin; 190 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 191 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 192 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 193 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 194 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 195 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 196 if (type == NORM_2) { 197 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 198 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 199 } 200 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 201 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 202 } 203 } else if (type == NORM_1) { 204 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 205 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 206 } 207 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 208 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 209 } 210 } else if (type == NORM_INFINITY) { 211 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 212 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 213 } 214 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 215 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 216 } 217 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 218 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 219 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 220 } 221 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 222 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 223 } 224 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 225 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 226 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 227 } 228 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 229 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 230 } 231 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 232 if (type == NORM_INFINITY) { 233 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 234 } else { 235 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 236 } 237 ierr = PetscFree(work);CHKERRQ(ierr); 238 if (type == NORM_2) { 239 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 240 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 241 for (i=0; i<n; i++) reductions[i] /= m; 242 } 243 PetscFunctionReturn(0); 244 } 245 246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 247 { 248 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 249 IS sis,gis; 250 PetscErrorCode ierr; 251 const PetscInt *isis,*igis; 252 PetscInt n,*iis,nsis,ngis,rstart,i; 253 254 PetscFunctionBegin; 255 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 256 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 257 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 258 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 259 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 260 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 261 262 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 263 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 264 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 265 n = ngis + nsis; 266 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 267 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 268 for (i=0; i<n; i++) iis[i] += rstart; 269 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 270 271 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 272 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 273 ierr = ISDestroy(&sis);CHKERRQ(ierr); 274 ierr = ISDestroy(&gis);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 278 /* 279 Local utility routine that creates a mapping from the global column 280 number to the local number in the off-diagonal part of the local 281 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 282 a slightly higher hash table cost; without it it is not scalable (each processor 283 has an order N integer array but is fast to access. 284 */ 285 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt n = aij->B->cmap->n,i; 290 291 PetscFunctionBegin; 292 if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 293 #if defined(PETSC_USE_CTABLE) 294 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 295 for (i=0; i<n; i++) { 296 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 297 } 298 #else 299 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 300 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 301 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 302 #endif 303 PetscFunctionReturn(0); 304 } 305 306 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 307 { \ 308 if (col <= lastcol1) low1 = 0; \ 309 else high1 = nrow1; \ 310 lastcol1 = col;\ 311 while (high1-low1 > 5) { \ 312 t = (low1+high1)/2; \ 313 if (rp1[t] > col) high1 = t; \ 314 else low1 = t; \ 315 } \ 316 for (_i=low1; _i<high1; _i++) { \ 317 if (rp1[_i] > col) break; \ 318 if (rp1[_i] == col) { \ 319 if (addv == ADD_VALUES) { \ 320 ap1[_i] += value; \ 321 /* Not sure LogFlops will slow dow the code or not */ \ 322 (void)PetscLogFlops(1.0); \ 323 } \ 324 else ap1[_i] = value; \ 325 inserted = PETSC_TRUE; \ 326 goto a_noinsert; \ 327 } \ 328 } \ 329 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 330 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 331 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 332 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 333 N = nrow1++ - 1; a->nz++; high1++; \ 334 /* shift up all the later entries in this row */ \ 335 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 336 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 337 rp1[_i] = col; \ 338 ap1[_i] = value; \ 339 A->nonzerostate++;\ 340 a_noinsert: ; \ 341 ailen[row] = nrow1; \ 342 } 343 344 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 345 { \ 346 if (col <= lastcol2) low2 = 0; \ 347 else high2 = nrow2; \ 348 lastcol2 = col; \ 349 while (high2-low2 > 5) { \ 350 t = (low2+high2)/2; \ 351 if (rp2[t] > col) high2 = t; \ 352 else low2 = t; \ 353 } \ 354 for (_i=low2; _i<high2; _i++) { \ 355 if (rp2[_i] > col) break; \ 356 if (rp2[_i] == col) { \ 357 if (addv == ADD_VALUES) { \ 358 ap2[_i] += value; \ 359 (void)PetscLogFlops(1.0); \ 360 } \ 361 else ap2[_i] = value; \ 362 inserted = PETSC_TRUE; \ 363 goto b_noinsert; \ 364 } \ 365 } \ 366 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 367 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 368 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 369 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 370 N = nrow2++ - 1; b->nz++; high2++; \ 371 /* shift up all the later entries in this row */ \ 372 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 373 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 374 rp2[_i] = col; \ 375 ap2[_i] = value; \ 376 B->nonzerostate++; \ 377 b_noinsert: ; \ 378 bilen[row] = nrow2; \ 379 } 380 381 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 382 { 383 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 384 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 385 PetscErrorCode ierr; 386 PetscInt l,*garray = mat->garray,diag; 387 388 PetscFunctionBegin; 389 /* code only works for square matrices A */ 390 391 /* find size of row to the left of the diagonal part */ 392 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 393 row = row - diag; 394 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 395 if (garray[b->j[b->i[row]+l]] > diag) break; 396 } 397 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 398 399 /* diagonal part */ 400 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 401 402 /* right of diagonal part */ 403 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 404 #if defined(PETSC_HAVE_DEVICE) 405 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 406 #endif 407 PetscFunctionReturn(0); 408 } 409 410 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscScalar value = 0.0; 414 PetscErrorCode ierr; 415 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 416 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 417 PetscBool roworiented = aij->roworiented; 418 419 /* Some Variables required in the macro */ 420 Mat A = aij->A; 421 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 422 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 423 PetscBool ignorezeroentries = a->ignorezeroentries; 424 Mat B = aij->B; 425 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 426 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 427 MatScalar *aa,*ba; 428 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 429 * cannot use "#if defined" inside a macro. */ 430 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 431 432 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 433 PetscInt nonew; 434 MatScalar *ap1,*ap2; 435 436 PetscFunctionBegin; 437 #if defined(PETSC_HAVE_DEVICE) 438 if (A->offloadmask == PETSC_OFFLOAD_GPU) { 439 const PetscScalar *dummy; 440 ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr); 441 ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr); 442 } 443 if (B->offloadmask == PETSC_OFFLOAD_GPU) { 444 const PetscScalar *dummy; 445 ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr); 446 ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr); 447 } 448 #endif 449 aa = a->a; 450 ba = b->a; 451 for (i=0; i<m; i++) { 452 if (im[i] < 0) continue; 453 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 454 if (im[i] >= rstart && im[i] < rend) { 455 row = im[i] - rstart; 456 lastcol1 = -1; 457 rp1 = aj + ai[row]; 458 ap1 = aa + ai[row]; 459 rmax1 = aimax[row]; 460 nrow1 = ailen[row]; 461 low1 = 0; 462 high1 = nrow1; 463 lastcol2 = -1; 464 rp2 = bj + bi[row]; 465 ap2 = ba + bi[row]; 466 rmax2 = bimax[row]; 467 nrow2 = bilen[row]; 468 low2 = 0; 469 high2 = nrow2; 470 471 for (j=0; j<n; j++) { 472 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 473 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 474 if (in[j] >= cstart && in[j] < cend) { 475 col = in[j] - cstart; 476 nonew = a->nonew; 477 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 478 #if defined(PETSC_HAVE_DEVICE) 479 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 480 #endif 481 } else if (in[j] < 0) continue; 482 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 483 else { 484 if (mat->was_assembled) { 485 if (!aij->colmap) { 486 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 487 } 488 #if defined(PETSC_USE_CTABLE) 489 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 490 col--; 491 #else 492 col = aij->colmap[in[j]] - 1; 493 #endif 494 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 495 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 496 col = in[j]; 497 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 498 B = aij->B; 499 b = (Mat_SeqAIJ*)B->data; 500 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 501 rp2 = bj + bi[row]; 502 ap2 = ba + bi[row]; 503 rmax2 = bimax[row]; 504 nrow2 = bilen[row]; 505 low2 = 0; 506 high2 = nrow2; 507 bm = aij->B->rmap->n; 508 ba = b->a; 509 inserted = PETSC_FALSE; 510 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 511 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 512 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 513 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 514 } 515 } else col = in[j]; 516 nonew = b->nonew; 517 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 518 #if defined(PETSC_HAVE_DEVICE) 519 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 520 #endif 521 } 522 } 523 } else { 524 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 525 if (!aij->donotstash) { 526 mat->assembled = PETSC_FALSE; 527 if (roworiented) { 528 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 529 } else { 530 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 531 } 532 } 533 } 534 } 535 PetscFunctionReturn(0); 536 } 537 538 /* 539 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 540 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 541 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 542 */ 543 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 544 { 545 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 546 Mat A = aij->A; /* diagonal part of the matrix */ 547 Mat B = aij->B; /* offdiagonal part of the matrix */ 548 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 549 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 550 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 551 PetscInt *ailen = a->ilen,*aj = a->j; 552 PetscInt *bilen = b->ilen,*bj = b->j; 553 PetscInt am = aij->A->rmap->n,j; 554 PetscInt diag_so_far = 0,dnz; 555 PetscInt offd_so_far = 0,onz; 556 557 PetscFunctionBegin; 558 /* Iterate over all rows of the matrix */ 559 for (j=0; j<am; j++) { 560 dnz = onz = 0; 561 /* Iterate over all non-zero columns of the current row */ 562 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 563 /* If column is in the diagonal */ 564 if (mat_j[col] >= cstart && mat_j[col] < cend) { 565 aj[diag_so_far++] = mat_j[col] - cstart; 566 dnz++; 567 } else { /* off-diagonal entries */ 568 bj[offd_so_far++] = mat_j[col]; 569 onz++; 570 } 571 } 572 ailen[j] = dnz; 573 bilen[j] = onz; 574 } 575 PetscFunctionReturn(0); 576 } 577 578 /* 579 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 580 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 581 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 582 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 583 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 584 */ 585 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 586 { 587 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 588 Mat A = aij->A; /* diagonal part of the matrix */ 589 Mat B = aij->B; /* offdiagonal part of the matrix */ 590 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 591 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 592 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 593 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 594 PetscInt *ailen = a->ilen,*aj = a->j; 595 PetscInt *bilen = b->ilen,*bj = b->j; 596 PetscInt am = aij->A->rmap->n,j; 597 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 598 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 599 PetscScalar *aa = a->a,*ba = b->a; 600 601 PetscFunctionBegin; 602 /* Iterate over all rows of the matrix */ 603 for (j=0; j<am; j++) { 604 dnz_row = onz_row = 0; 605 rowstart_offd = full_offd_i[j]; 606 rowstart_diag = full_diag_i[j]; 607 /* Iterate over all non-zero columns of the current row */ 608 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 609 /* If column is in the diagonal */ 610 if (mat_j[col] >= cstart && mat_j[col] < cend) { 611 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 612 aa[rowstart_diag+dnz_row] = mat_a[col]; 613 dnz_row++; 614 } else { /* off-diagonal entries */ 615 bj[rowstart_offd+onz_row] = mat_j[col]; 616 ba[rowstart_offd+onz_row] = mat_a[col]; 617 onz_row++; 618 } 619 } 620 ailen[j] = dnz_row; 621 bilen[j] = onz_row; 622 } 623 PetscFunctionReturn(0); 624 } 625 626 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 627 { 628 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 629 PetscErrorCode ierr; 630 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 631 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 632 633 PetscFunctionBegin; 634 for (i=0; i<m; i++) { 635 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 636 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 637 if (idxm[i] >= rstart && idxm[i] < rend) { 638 row = idxm[i] - rstart; 639 for (j=0; j<n; j++) { 640 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 641 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 642 if (idxn[j] >= cstart && idxn[j] < cend) { 643 col = idxn[j] - cstart; 644 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 645 } else { 646 if (!aij->colmap) { 647 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 648 } 649 #if defined(PETSC_USE_CTABLE) 650 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 651 col--; 652 #else 653 col = aij->colmap[idxn[j]] - 1; 654 #endif 655 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 656 else { 657 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 658 } 659 } 660 } 661 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 662 } 663 PetscFunctionReturn(0); 664 } 665 666 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscInt nstash,reallocs; 671 672 PetscFunctionBegin; 673 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 674 675 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 676 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 677 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 682 { 683 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 684 PetscErrorCode ierr; 685 PetscMPIInt n; 686 PetscInt i,j,rstart,ncols,flg; 687 PetscInt *row,*col; 688 PetscBool other_disassembled; 689 PetscScalar *val; 690 691 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 692 693 PetscFunctionBegin; 694 if (!aij->donotstash && !mat->nooffprocentries) { 695 while (1) { 696 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 697 if (!flg) break; 698 699 for (i=0; i<n;) { 700 /* Now identify the consecutive vals belonging to the same row */ 701 for (j=i,rstart=row[j]; j<n; j++) { 702 if (row[j] != rstart) break; 703 } 704 if (j < n) ncols = j-i; 705 else ncols = n-i; 706 /* Now assemble all these values with a single function call */ 707 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 708 i = j; 709 } 710 } 711 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 712 } 713 #if defined(PETSC_HAVE_DEVICE) 714 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 715 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 716 if (mat->boundtocpu) { 717 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 718 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 719 } 720 #endif 721 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 722 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 723 724 /* determine if any processor has disassembled, if so we must 725 also disassemble ourself, in order that we may reassemble. */ 726 /* 727 if nonzero structure of submatrix B cannot change then we know that 728 no processor disassembled thus we can skip this stuff 729 */ 730 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 731 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 732 if (mat->was_assembled && !other_disassembled) { 733 #if defined(PETSC_HAVE_DEVICE) 734 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 735 #endif 736 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 737 } 738 } 739 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 740 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 741 } 742 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 743 #if defined(PETSC_HAVE_DEVICE) 744 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 745 #endif 746 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 747 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 748 749 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 750 751 aij->rowvalues = NULL; 752 753 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 754 755 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 756 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 757 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 758 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 759 } 760 #if defined(PETSC_HAVE_DEVICE) 761 mat->offloadmask = PETSC_OFFLOAD_BOTH; 762 #endif 763 PetscFunctionReturn(0); 764 } 765 766 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 767 { 768 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 769 PetscErrorCode ierr; 770 771 PetscFunctionBegin; 772 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 773 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 774 PetscFunctionReturn(0); 775 } 776 777 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 778 { 779 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 780 PetscObjectState sA, sB; 781 PetscInt *lrows; 782 PetscInt r, len; 783 PetscBool cong, lch, gch; 784 PetscErrorCode ierr; 785 786 PetscFunctionBegin; 787 /* get locally owned rows */ 788 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 789 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 790 /* fix right hand side if needed */ 791 if (x && b) { 792 const PetscScalar *xx; 793 PetscScalar *bb; 794 795 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 796 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 797 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 798 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 799 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 800 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 801 } 802 803 sA = mat->A->nonzerostate; 804 sB = mat->B->nonzerostate; 805 806 if (diag != 0.0 && cong) { 807 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 808 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 810 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 811 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 812 PetscInt nnwA, nnwB; 813 PetscBool nnzA, nnzB; 814 815 nnwA = aijA->nonew; 816 nnwB = aijB->nonew; 817 nnzA = aijA->keepnonzeropattern; 818 nnzB = aijB->keepnonzeropattern; 819 if (!nnzA) { 820 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 821 aijA->nonew = 0; 822 } 823 if (!nnzB) { 824 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 825 aijB->nonew = 0; 826 } 827 /* Must zero here before the next loop */ 828 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 829 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 830 for (r = 0; r < len; ++r) { 831 const PetscInt row = lrows[r] + A->rmap->rstart; 832 if (row >= A->cmap->N) continue; 833 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 834 } 835 aijA->nonew = nnwA; 836 aijB->nonew = nnwB; 837 } else { 838 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 839 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 840 } 841 ierr = PetscFree(lrows);CHKERRQ(ierr); 842 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 843 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 844 845 /* reduce nonzerostate */ 846 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 847 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 848 if (gch) A->nonzerostate++; 849 PetscFunctionReturn(0); 850 } 851 852 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 853 { 854 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 855 PetscErrorCode ierr; 856 PetscMPIInt n = A->rmap->n; 857 PetscInt i,j,r,m,len = 0; 858 PetscInt *lrows,*owners = A->rmap->range; 859 PetscMPIInt p = 0; 860 PetscSFNode *rrows; 861 PetscSF sf; 862 const PetscScalar *xx; 863 PetscScalar *bb,*mask; 864 Vec xmask,lmask; 865 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 866 const PetscInt *aj, *ii,*ridx; 867 PetscScalar *aa; 868 869 PetscFunctionBegin; 870 /* Create SF where leaves are input rows and roots are owned rows */ 871 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 872 for (r = 0; r < n; ++r) lrows[r] = -1; 873 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 874 for (r = 0; r < N; ++r) { 875 const PetscInt idx = rows[r]; 876 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 877 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 878 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 879 } 880 rrows[r].rank = p; 881 rrows[r].index = rows[r] - owners[p]; 882 } 883 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 884 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 885 /* Collect flags for rows to be zeroed */ 886 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 887 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 888 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 889 /* Compress and put in row numbers */ 890 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 891 /* zero diagonal part of matrix */ 892 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 893 /* handle off diagonal part of matrix */ 894 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 895 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 896 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 897 for (i=0; i<len; i++) bb[lrows[i]] = 1; 898 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 899 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 902 if (x && b) { /* this code is buggy when the row and column layout don't match */ 903 PetscBool cong; 904 905 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 906 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 907 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 910 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 911 } 912 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 913 /* remove zeroed rows of off diagonal matrix */ 914 ii = aij->i; 915 for (i=0; i<len; i++) { 916 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 917 } 918 /* loop over all elements of off process part of matrix zeroing removed columns*/ 919 if (aij->compressedrow.use) { 920 m = aij->compressedrow.nrows; 921 ii = aij->compressedrow.i; 922 ridx = aij->compressedrow.rindex; 923 for (i=0; i<m; i++) { 924 n = ii[i+1] - ii[i]; 925 aj = aij->j + ii[i]; 926 aa = aij->a + ii[i]; 927 928 for (j=0; j<n; j++) { 929 if (PetscAbsScalar(mask[*aj])) { 930 if (b) bb[*ridx] -= *aa*xx[*aj]; 931 *aa = 0.0; 932 } 933 aa++; 934 aj++; 935 } 936 ridx++; 937 } 938 } else { /* do not use compressed row format */ 939 m = l->B->rmap->n; 940 for (i=0; i<m; i++) { 941 n = ii[i+1] - ii[i]; 942 aj = aij->j + ii[i]; 943 aa = aij->a + ii[i]; 944 for (j=0; j<n; j++) { 945 if (PetscAbsScalar(mask[*aj])) { 946 if (b) bb[i] -= *aa*xx[*aj]; 947 *aa = 0.0; 948 } 949 aa++; 950 aj++; 951 } 952 } 953 } 954 if (x && b) { 955 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 956 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 957 } 958 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 959 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 960 ierr = PetscFree(lrows);CHKERRQ(ierr); 961 962 /* only change matrix nonzero state if pattern was allowed to be changed */ 963 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 964 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 965 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 966 } 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 971 { 972 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 973 PetscErrorCode ierr; 974 PetscInt nt; 975 VecScatter Mvctx = a->Mvctx; 976 977 PetscFunctionBegin; 978 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 979 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 980 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 982 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 988 { 989 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 990 PetscErrorCode ierr; 991 992 PetscFunctionBegin; 993 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 998 { 999 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1000 PetscErrorCode ierr; 1001 VecScatter Mvctx = a->Mvctx; 1002 1003 PetscFunctionBegin; 1004 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1012 { 1013 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1014 PetscErrorCode ierr; 1015 1016 PetscFunctionBegin; 1017 /* do nondiagonal part */ 1018 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1019 /* do local part */ 1020 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1021 /* add partial results together */ 1022 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1023 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1028 { 1029 MPI_Comm comm; 1030 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1031 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1032 IS Me,Notme; 1033 PetscErrorCode ierr; 1034 PetscInt M,N,first,last,*notme,i; 1035 PetscBool lf; 1036 PetscMPIInt size; 1037 1038 PetscFunctionBegin; 1039 /* Easy test: symmetric diagonal block */ 1040 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1041 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1042 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr); 1043 if (!*f) PetscFunctionReturn(0); 1044 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1045 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1046 if (size == 1) PetscFunctionReturn(0); 1047 1048 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1049 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1050 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1051 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1052 for (i=0; i<first; i++) notme[i] = i; 1053 for (i=last; i<M; i++) notme[i-last+first] = i; 1054 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1055 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1056 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1057 Aoff = Aoffs[0]; 1058 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1059 Boff = Boffs[0]; 1060 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1061 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1062 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1063 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1064 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1065 ierr = PetscFree(notme);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1070 { 1071 PetscErrorCode ierr; 1072 1073 PetscFunctionBegin; 1074 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1079 { 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1081 PetscErrorCode ierr; 1082 1083 PetscFunctionBegin; 1084 /* do nondiagonal part */ 1085 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1086 /* do local part */ 1087 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1088 /* add partial results together */ 1089 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1090 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 /* 1095 This only works correctly for square matrices where the subblock A->A is the 1096 diagonal block 1097 */ 1098 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1099 { 1100 PetscErrorCode ierr; 1101 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1102 1103 PetscFunctionBegin; 1104 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1105 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1106 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1117 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1122 { 1123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 #if defined(PETSC_USE_LOG) 1128 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1129 #endif 1130 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1131 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1132 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1133 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1134 #if defined(PETSC_USE_CTABLE) 1135 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1136 #else 1137 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1138 #endif 1139 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1140 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1141 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1142 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1143 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1144 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1145 1146 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1147 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1148 1149 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1154 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1155 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1156 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1157 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1158 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1159 #if defined(PETSC_HAVE_CUDA) 1160 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1161 #endif 1162 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1164 #endif 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 1166 #if defined(PETSC_HAVE_ELEMENTAL) 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1168 #endif 1169 #if defined(PETSC_HAVE_SCALAPACK) 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1171 #endif 1172 #if defined(PETSC_HAVE_HYPRE) 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1175 #endif 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1179 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1181 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1182 #if defined(PETSC_HAVE_MKL_SPARSE) 1183 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1184 #endif 1185 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1186 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1187 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1192 { 1193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1194 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1195 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1196 const PetscInt *garray = aij->garray; 1197 const PetscScalar *aa,*ba; 1198 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1199 PetscInt *rowlens; 1200 PetscInt *colidxs; 1201 PetscScalar *matvals; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1206 1207 M = mat->rmap->N; 1208 N = mat->cmap->N; 1209 m = mat->rmap->n; 1210 rs = mat->rmap->rstart; 1211 cs = mat->cmap->rstart; 1212 nz = A->nz + B->nz; 1213 1214 /* write matrix header */ 1215 header[0] = MAT_FILE_CLASSID; 1216 header[1] = M; header[2] = N; header[3] = nz; 1217 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1218 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1219 1220 /* fill in and store row lengths */ 1221 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1222 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1223 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1224 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1225 1226 /* fill in and store column indices */ 1227 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1228 for (cnt=0, i=0; i<m; i++) { 1229 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1230 if (garray[B->j[jb]] > cs) break; 1231 colidxs[cnt++] = garray[B->j[jb]]; 1232 } 1233 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1234 colidxs[cnt++] = A->j[ja] + cs; 1235 for (; jb<B->i[i+1]; jb++) 1236 colidxs[cnt++] = garray[B->j[jb]]; 1237 } 1238 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1239 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1240 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1241 1242 /* fill in and store nonzero values */ 1243 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1244 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1245 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1246 for (cnt=0, i=0; i<m; i++) { 1247 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1248 if (garray[B->j[jb]] > cs) break; 1249 matvals[cnt++] = ba[jb]; 1250 } 1251 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1252 matvals[cnt++] = aa[ja]; 1253 for (; jb<B->i[i+1]; jb++) 1254 matvals[cnt++] = ba[jb]; 1255 } 1256 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1257 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1258 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1259 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1260 ierr = PetscFree(matvals);CHKERRQ(ierr); 1261 1262 /* write block size option to the viewer's .info file */ 1263 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #include <petscdraw.h> 1268 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1269 { 1270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1271 PetscErrorCode ierr; 1272 PetscMPIInt rank = aij->rank,size = aij->size; 1273 PetscBool isdraw,iascii,isbinary; 1274 PetscViewer sviewer; 1275 PetscViewerFormat format; 1276 1277 PetscFunctionBegin; 1278 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1279 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1280 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1281 if (iascii) { 1282 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1283 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1284 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1285 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1286 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1287 for (i=0; i<(PetscInt)size; i++) { 1288 nmax = PetscMax(nmax,nz[i]); 1289 nmin = PetscMin(nmin,nz[i]); 1290 navg += nz[i]; 1291 } 1292 ierr = PetscFree(nz);CHKERRQ(ierr); 1293 navg = navg/size; 1294 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1298 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1299 MatInfo info; 1300 PetscInt *inodes=NULL; 1301 1302 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1303 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1304 ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1306 if (!inodes) { 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1308 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1309 } else { 1310 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1311 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1312 } 1313 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1314 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1315 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1317 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1318 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1319 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1320 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1323 PetscInt inodecount,inodelimit,*inodes; 1324 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1325 if (inodes) { 1326 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1327 } else { 1328 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1329 } 1330 PetscFunctionReturn(0); 1331 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1332 PetscFunctionReturn(0); 1333 } 1334 } else if (isbinary) { 1335 if (size == 1) { 1336 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1337 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1338 } else { 1339 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1340 } 1341 PetscFunctionReturn(0); 1342 } else if (iascii && size == 1) { 1343 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1344 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } else if (isdraw) { 1347 PetscDraw draw; 1348 PetscBool isnull; 1349 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1350 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1351 if (isnull) PetscFunctionReturn(0); 1352 } 1353 1354 { /* assemble the entire matrix onto first processor */ 1355 Mat A = NULL, Av; 1356 IS isrow,iscol; 1357 1358 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1359 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1360 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1361 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1362 /* The commented code uses MatCreateSubMatrices instead */ 1363 /* 1364 Mat *AA, A = NULL, Av; 1365 IS isrow,iscol; 1366 1367 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1368 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1369 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1370 if (rank == 0) { 1371 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1372 A = AA[0]; 1373 Av = AA[0]; 1374 } 1375 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1376 */ 1377 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1378 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1379 /* 1380 Everyone has to call to draw the matrix since the graphics waits are 1381 synchronized across all processors that share the PetscDraw object 1382 */ 1383 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1384 if (rank == 0) { 1385 if (((PetscObject)mat)->name) { 1386 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1387 } 1388 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1389 } 1390 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1391 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1392 ierr = MatDestroy(&A);CHKERRQ(ierr); 1393 } 1394 PetscFunctionReturn(0); 1395 } 1396 1397 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1398 { 1399 PetscErrorCode ierr; 1400 PetscBool iascii,isdraw,issocket,isbinary; 1401 1402 PetscFunctionBegin; 1403 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1404 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1405 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1406 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1407 if (iascii || isdraw || isbinary || issocket) { 1408 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1409 } 1410 PetscFunctionReturn(0); 1411 } 1412 1413 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1414 { 1415 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1416 PetscErrorCode ierr; 1417 Vec bb1 = NULL; 1418 PetscBool hasop; 1419 1420 PetscFunctionBegin; 1421 if (flag == SOR_APPLY_UPPER) { 1422 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1427 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1428 } 1429 1430 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1433 its--; 1434 } 1435 1436 while (its--) { 1437 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1438 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1439 1440 /* update rhs: bb1 = bb - B*x */ 1441 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1442 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1443 1444 /* local sweep */ 1445 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1446 } 1447 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1448 if (flag & SOR_ZERO_INITIAL_GUESS) { 1449 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1450 its--; 1451 } 1452 while (its--) { 1453 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1454 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1455 1456 /* update rhs: bb1 = bb - B*x */ 1457 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1458 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1459 1460 /* local sweep */ 1461 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1462 } 1463 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1464 if (flag & SOR_ZERO_INITIAL_GUESS) { 1465 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1466 its--; 1467 } 1468 while (its--) { 1469 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1470 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1471 1472 /* update rhs: bb1 = bb - B*x */ 1473 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1474 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1475 1476 /* local sweep */ 1477 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1478 } 1479 } else if (flag & SOR_EISENSTAT) { 1480 Vec xx1; 1481 1482 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1483 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1484 1485 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1486 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1487 if (!mat->diag) { 1488 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1489 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1490 } 1491 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1492 if (hasop) { 1493 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1494 } else { 1495 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1496 } 1497 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1498 1499 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1500 1501 /* local sweep */ 1502 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1503 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1504 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1505 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1506 1507 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1508 1509 matin->factorerrortype = mat->A->factorerrortype; 1510 PetscFunctionReturn(0); 1511 } 1512 1513 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1514 { 1515 Mat aA,aB,Aperm; 1516 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1517 PetscScalar *aa,*ba; 1518 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1519 PetscSF rowsf,sf; 1520 IS parcolp = NULL; 1521 PetscBool done; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1526 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1527 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1528 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1529 1530 /* Invert row permutation to find out where my rows should go */ 1531 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1532 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1533 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1534 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1535 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1536 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1537 1538 /* Invert column permutation to find out where my columns should go */ 1539 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1540 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1541 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1542 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1543 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1544 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1545 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1546 1547 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1548 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1549 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1550 1551 /* Find out where my gcols should go */ 1552 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1553 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1554 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1555 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1556 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1557 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1558 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1559 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1560 1561 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1562 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1563 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1564 for (i=0; i<m; i++) { 1565 PetscInt row = rdest[i]; 1566 PetscMPIInt rowner; 1567 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1568 for (j=ai[i]; j<ai[i+1]; j++) { 1569 PetscInt col = cdest[aj[j]]; 1570 PetscMPIInt cowner; 1571 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1572 if (rowner == cowner) dnnz[i]++; 1573 else onnz[i]++; 1574 } 1575 for (j=bi[i]; j<bi[i+1]; j++) { 1576 PetscInt col = gcdest[bj[j]]; 1577 PetscMPIInt cowner; 1578 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1579 if (rowner == cowner) dnnz[i]++; 1580 else onnz[i]++; 1581 } 1582 } 1583 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1584 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1585 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1586 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1587 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1588 1589 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1590 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1591 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1592 for (i=0; i<m; i++) { 1593 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1594 PetscInt j0,rowlen; 1595 rowlen = ai[i+1] - ai[i]; 1596 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1597 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1598 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1599 } 1600 rowlen = bi[i+1] - bi[i]; 1601 for (j0=j=0; j<rowlen; j0=j) { 1602 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1603 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1604 } 1605 } 1606 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1607 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1608 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1609 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1610 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1611 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1612 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1613 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1614 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1615 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1616 *B = Aperm; 1617 PetscFunctionReturn(0); 1618 } 1619 1620 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1621 { 1622 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1623 PetscErrorCode ierr; 1624 1625 PetscFunctionBegin; 1626 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1627 if (ghosts) *ghosts = aij->garray; 1628 PetscFunctionReturn(0); 1629 } 1630 1631 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1632 { 1633 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1634 Mat A = mat->A,B = mat->B; 1635 PetscErrorCode ierr; 1636 PetscLogDouble isend[5],irecv[5]; 1637 1638 PetscFunctionBegin; 1639 info->block_size = 1.0; 1640 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1641 1642 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1643 isend[3] = info->memory; isend[4] = info->mallocs; 1644 1645 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1646 1647 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1648 isend[3] += info->memory; isend[4] += info->mallocs; 1649 if (flag == MAT_LOCAL) { 1650 info->nz_used = isend[0]; 1651 info->nz_allocated = isend[1]; 1652 info->nz_unneeded = isend[2]; 1653 info->memory = isend[3]; 1654 info->mallocs = isend[4]; 1655 } else if (flag == MAT_GLOBAL_MAX) { 1656 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1657 1658 info->nz_used = irecv[0]; 1659 info->nz_allocated = irecv[1]; 1660 info->nz_unneeded = irecv[2]; 1661 info->memory = irecv[3]; 1662 info->mallocs = irecv[4]; 1663 } else if (flag == MAT_GLOBAL_SUM) { 1664 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } 1672 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1673 info->fill_ratio_needed = 0; 1674 info->factor_mallocs = 0; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1679 { 1680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1681 PetscErrorCode ierr; 1682 1683 PetscFunctionBegin; 1684 switch (op) { 1685 case MAT_NEW_NONZERO_LOCATIONS: 1686 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1687 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1688 case MAT_KEEP_NONZERO_PATTERN: 1689 case MAT_NEW_NONZERO_LOCATION_ERR: 1690 case MAT_USE_INODES: 1691 case MAT_IGNORE_ZERO_ENTRIES: 1692 case MAT_FORM_EXPLICIT_TRANSPOSE: 1693 MatCheckPreallocated(A,1); 1694 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1695 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1696 break; 1697 case MAT_ROW_ORIENTED: 1698 MatCheckPreallocated(A,1); 1699 a->roworiented = flg; 1700 1701 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1702 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1703 break; 1704 case MAT_FORCE_DIAGONAL_ENTRIES: 1705 case MAT_SORTED_FULL: 1706 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1707 break; 1708 case MAT_IGNORE_OFF_PROC_ENTRIES: 1709 a->donotstash = flg; 1710 break; 1711 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1712 case MAT_SPD: 1713 case MAT_SYMMETRIC: 1714 case MAT_STRUCTURALLY_SYMMETRIC: 1715 case MAT_HERMITIAN: 1716 case MAT_SYMMETRY_ETERNAL: 1717 break; 1718 case MAT_SUBMAT_SINGLEIS: 1719 A->submat_singleis = flg; 1720 break; 1721 case MAT_STRUCTURE_ONLY: 1722 /* The option is handled directly by MatSetOption() */ 1723 break; 1724 default: 1725 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1726 } 1727 PetscFunctionReturn(0); 1728 } 1729 1730 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1731 { 1732 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1733 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1734 PetscErrorCode ierr; 1735 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1736 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1737 PetscInt *cmap,*idx_p; 1738 1739 PetscFunctionBegin; 1740 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1741 mat->getrowactive = PETSC_TRUE; 1742 1743 if (!mat->rowvalues && (idx || v)) { 1744 /* 1745 allocate enough space to hold information from the longest row. 1746 */ 1747 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1748 PetscInt max = 1,tmp; 1749 for (i=0; i<matin->rmap->n; i++) { 1750 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1751 if (max < tmp) max = tmp; 1752 } 1753 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1754 } 1755 1756 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1757 lrow = row - rstart; 1758 1759 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1760 if (!v) {pvA = NULL; pvB = NULL;} 1761 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1762 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1763 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1764 nztot = nzA + nzB; 1765 1766 cmap = mat->garray; 1767 if (v || idx) { 1768 if (nztot) { 1769 /* Sort by increasing column numbers, assuming A and B already sorted */ 1770 PetscInt imark = -1; 1771 if (v) { 1772 *v = v_p = mat->rowvalues; 1773 for (i=0; i<nzB; i++) { 1774 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1775 else break; 1776 } 1777 imark = i; 1778 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1779 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1780 } 1781 if (idx) { 1782 *idx = idx_p = mat->rowindices; 1783 if (imark > -1) { 1784 for (i=0; i<imark; i++) { 1785 idx_p[i] = cmap[cworkB[i]]; 1786 } 1787 } else { 1788 for (i=0; i<nzB; i++) { 1789 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1790 else break; 1791 } 1792 imark = i; 1793 } 1794 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1795 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1796 } 1797 } else { 1798 if (idx) *idx = NULL; 1799 if (v) *v = NULL; 1800 } 1801 } 1802 *nz = nztot; 1803 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1804 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1805 PetscFunctionReturn(0); 1806 } 1807 1808 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1809 { 1810 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1811 1812 PetscFunctionBegin; 1813 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1814 aij->getrowactive = PETSC_FALSE; 1815 PetscFunctionReturn(0); 1816 } 1817 1818 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1819 { 1820 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1821 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1822 PetscErrorCode ierr; 1823 PetscInt i,j,cstart = mat->cmap->rstart; 1824 PetscReal sum = 0.0; 1825 MatScalar *v; 1826 1827 PetscFunctionBegin; 1828 if (aij->size == 1) { 1829 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1830 } else { 1831 if (type == NORM_FROBENIUS) { 1832 v = amat->a; 1833 for (i=0; i<amat->nz; i++) { 1834 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1835 } 1836 v = bmat->a; 1837 for (i=0; i<bmat->nz; i++) { 1838 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1839 } 1840 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1841 *norm = PetscSqrtReal(*norm); 1842 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1843 } else if (type == NORM_1) { /* max column norm */ 1844 PetscReal *tmp,*tmp2; 1845 PetscInt *jj,*garray = aij->garray; 1846 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1847 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1848 *norm = 0.0; 1849 v = amat->a; jj = amat->j; 1850 for (j=0; j<amat->nz; j++) { 1851 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1852 } 1853 v = bmat->a; jj = bmat->j; 1854 for (j=0; j<bmat->nz; j++) { 1855 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1856 } 1857 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1858 for (j=0; j<mat->cmap->N; j++) { 1859 if (tmp2[j] > *norm) *norm = tmp2[j]; 1860 } 1861 ierr = PetscFree(tmp);CHKERRQ(ierr); 1862 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1863 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1864 } else if (type == NORM_INFINITY) { /* max row norm */ 1865 PetscReal ntemp = 0.0; 1866 for (j=0; j<aij->A->rmap->n; j++) { 1867 v = amat->a + amat->i[j]; 1868 sum = 0.0; 1869 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1870 sum += PetscAbsScalar(*v); v++; 1871 } 1872 v = bmat->a + bmat->i[j]; 1873 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1874 sum += PetscAbsScalar(*v); v++; 1875 } 1876 if (sum > ntemp) ntemp = sum; 1877 } 1878 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1879 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1880 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1881 } 1882 PetscFunctionReturn(0); 1883 } 1884 1885 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1886 { 1887 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1888 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1889 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1890 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1891 PetscErrorCode ierr; 1892 Mat B,A_diag,*B_diag; 1893 const MatScalar *pbv,*bv; 1894 1895 PetscFunctionBegin; 1896 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1897 ai = Aloc->i; aj = Aloc->j; 1898 bi = Bloc->i; bj = Bloc->j; 1899 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1900 PetscInt *d_nnz,*g_nnz,*o_nnz; 1901 PetscSFNode *oloc; 1902 PETSC_UNUSED PetscSF sf; 1903 1904 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1905 /* compute d_nnz for preallocation */ 1906 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1907 for (i=0; i<ai[ma]; i++) { 1908 d_nnz[aj[i]]++; 1909 } 1910 /* compute local off-diagonal contributions */ 1911 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1912 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1913 /* map those to global */ 1914 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1915 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1916 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1917 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1918 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1919 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1920 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1921 1922 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1923 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1924 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1925 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1926 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1927 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1928 } else { 1929 B = *matout; 1930 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1931 } 1932 1933 b = (Mat_MPIAIJ*)B->data; 1934 A_diag = a->A; 1935 B_diag = &b->A; 1936 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1937 A_diag_ncol = A_diag->cmap->N; 1938 B_diag_ilen = sub_B_diag->ilen; 1939 B_diag_i = sub_B_diag->i; 1940 1941 /* Set ilen for diagonal of B */ 1942 for (i=0; i<A_diag_ncol; i++) { 1943 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1944 } 1945 1946 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1947 very quickly (=without using MatSetValues), because all writes are local. */ 1948 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1949 1950 /* copy over the B part */ 1951 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1952 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1953 pbv = bv; 1954 row = A->rmap->rstart; 1955 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1956 cols_tmp = cols; 1957 for (i=0; i<mb; i++) { 1958 ncol = bi[i+1]-bi[i]; 1959 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1960 row++; 1961 pbv += ncol; cols_tmp += ncol; 1962 } 1963 ierr = PetscFree(cols);CHKERRQ(ierr); 1964 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1965 1966 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1967 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1968 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1969 *matout = B; 1970 } else { 1971 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1972 } 1973 PetscFunctionReturn(0); 1974 } 1975 1976 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1977 { 1978 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1979 Mat a = aij->A,b = aij->B; 1980 PetscErrorCode ierr; 1981 PetscInt s1,s2,s3; 1982 1983 PetscFunctionBegin; 1984 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1985 if (rr) { 1986 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1987 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1988 /* Overlap communication with computation. */ 1989 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1990 } 1991 if (ll) { 1992 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1993 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1994 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1995 } 1996 /* scale the diagonal block */ 1997 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1998 1999 if (rr) { 2000 /* Do a scatter end and then right scale the off-diagonal block */ 2001 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2002 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2003 } 2004 PetscFunctionReturn(0); 2005 } 2006 2007 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2008 { 2009 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2010 PetscErrorCode ierr; 2011 2012 PetscFunctionBegin; 2013 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2014 PetscFunctionReturn(0); 2015 } 2016 2017 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2018 { 2019 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2020 Mat a,b,c,d; 2021 PetscBool flg; 2022 PetscErrorCode ierr; 2023 2024 PetscFunctionBegin; 2025 a = matA->A; b = matA->B; 2026 c = matB->A; d = matB->B; 2027 2028 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2029 if (flg) { 2030 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2031 } 2032 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2033 PetscFunctionReturn(0); 2034 } 2035 2036 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2037 { 2038 PetscErrorCode ierr; 2039 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2040 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2041 2042 PetscFunctionBegin; 2043 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2044 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2045 /* because of the column compression in the off-processor part of the matrix a->B, 2046 the number of columns in a->B and b->B may be different, hence we cannot call 2047 the MatCopy() directly on the two parts. If need be, we can provide a more 2048 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2049 then copying the submatrices */ 2050 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2051 } else { 2052 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2053 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2054 } 2055 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2056 PetscFunctionReturn(0); 2057 } 2058 2059 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2060 { 2061 PetscErrorCode ierr; 2062 2063 PetscFunctionBegin; 2064 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /* 2069 Computes the number of nonzeros per row needed for preallocation when X and Y 2070 have different nonzero structure. 2071 */ 2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2073 { 2074 PetscInt i,j,k,nzx,nzy; 2075 2076 PetscFunctionBegin; 2077 /* Set the number of nonzeros in the new matrix */ 2078 for (i=0; i<m; i++) { 2079 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2080 nzx = xi[i+1] - xi[i]; 2081 nzy = yi[i+1] - yi[i]; 2082 nnz[i] = 0; 2083 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2084 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2085 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2086 nnz[i]++; 2087 } 2088 for (; k<nzy; k++) nnz[i]++; 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2095 { 2096 PetscErrorCode ierr; 2097 PetscInt m = Y->rmap->N; 2098 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2099 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2100 2101 PetscFunctionBegin; 2102 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2103 PetscFunctionReturn(0); 2104 } 2105 2106 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2107 { 2108 PetscErrorCode ierr; 2109 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2110 2111 PetscFunctionBegin; 2112 if (str == SAME_NONZERO_PATTERN) { 2113 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2114 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2115 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2116 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2117 } else { 2118 Mat B; 2119 PetscInt *nnz_d,*nnz_o; 2120 2121 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2122 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2123 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2124 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2125 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2126 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2127 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2128 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2129 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2130 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2131 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2132 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2133 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2134 } 2135 PetscFunctionReturn(0); 2136 } 2137 2138 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2139 2140 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2141 { 2142 #if defined(PETSC_USE_COMPLEX) 2143 PetscErrorCode ierr; 2144 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2145 2146 PetscFunctionBegin; 2147 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2148 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2149 #else 2150 PetscFunctionBegin; 2151 #endif 2152 PetscFunctionReturn(0); 2153 } 2154 2155 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2156 { 2157 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2158 PetscErrorCode ierr; 2159 2160 PetscFunctionBegin; 2161 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2162 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2167 { 2168 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2173 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2178 { 2179 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2180 PetscErrorCode ierr; 2181 PetscInt i,*idxb = NULL,m = A->rmap->n; 2182 PetscScalar *va,*vv; 2183 Vec vB,vA; 2184 const PetscScalar *vb; 2185 2186 PetscFunctionBegin; 2187 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2188 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2189 2190 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2191 if (idx) { 2192 for (i=0; i<m; i++) { 2193 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2194 } 2195 } 2196 2197 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2198 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2199 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2200 2201 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2202 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2203 for (i=0; i<m; i++) { 2204 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2205 vv[i] = vb[i]; 2206 if (idx) idx[i] = a->garray[idxb[i]]; 2207 } else { 2208 vv[i] = va[i]; 2209 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2210 idx[i] = a->garray[idxb[i]]; 2211 } 2212 } 2213 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2214 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2215 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2216 ierr = PetscFree(idxb);CHKERRQ(ierr); 2217 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2218 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2219 PetscFunctionReturn(0); 2220 } 2221 2222 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2223 { 2224 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2225 PetscInt m = A->rmap->n,n = A->cmap->n; 2226 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2227 PetscInt *cmap = mat->garray; 2228 PetscInt *diagIdx, *offdiagIdx; 2229 Vec diagV, offdiagV; 2230 PetscScalar *a, *diagA, *offdiagA; 2231 const PetscScalar *ba,*bav; 2232 PetscInt r,j,col,ncols,*bi,*bj; 2233 PetscErrorCode ierr; 2234 Mat B = mat->B; 2235 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2236 2237 PetscFunctionBegin; 2238 /* When a process holds entire A and other processes have no entry */ 2239 if (A->cmap->N == n) { 2240 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2241 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2242 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2243 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2244 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2245 PetscFunctionReturn(0); 2246 } else if (n == 0) { 2247 if (m) { 2248 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2249 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2250 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2251 } 2252 PetscFunctionReturn(0); 2253 } 2254 2255 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2256 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2257 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2258 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2259 2260 /* Get offdiagIdx[] for implicit 0.0 */ 2261 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2262 ba = bav; 2263 bi = b->i; 2264 bj = b->j; 2265 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2266 for (r = 0; r < m; r++) { 2267 ncols = bi[r+1] - bi[r]; 2268 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2269 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2270 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2271 offdiagA[r] = 0.0; 2272 2273 /* Find first hole in the cmap */ 2274 for (j=0; j<ncols; j++) { 2275 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2276 if (col > j && j < cstart) { 2277 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2278 break; 2279 } else if (col > j + n && j >= cstart) { 2280 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2281 break; 2282 } 2283 } 2284 if (j == ncols && ncols < A->cmap->N - n) { 2285 /* a hole is outside compressed Bcols */ 2286 if (ncols == 0) { 2287 if (cstart) { 2288 offdiagIdx[r] = 0; 2289 } else offdiagIdx[r] = cend; 2290 } else { /* ncols > 0 */ 2291 offdiagIdx[r] = cmap[ncols-1] + 1; 2292 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2293 } 2294 } 2295 } 2296 2297 for (j=0; j<ncols; j++) { 2298 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2299 ba++; bj++; 2300 } 2301 } 2302 2303 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2304 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2305 for (r = 0; r < m; ++r) { 2306 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2307 a[r] = diagA[r]; 2308 if (idx) idx[r] = cstart + diagIdx[r]; 2309 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2310 a[r] = diagA[r]; 2311 if (idx) { 2312 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2313 idx[r] = cstart + diagIdx[r]; 2314 } else idx[r] = offdiagIdx[r]; 2315 } 2316 } else { 2317 a[r] = offdiagA[r]; 2318 if (idx) idx[r] = offdiagIdx[r]; 2319 } 2320 } 2321 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2322 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2323 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2324 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2325 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2326 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2327 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2328 PetscFunctionReturn(0); 2329 } 2330 2331 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2332 { 2333 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2334 PetscInt m = A->rmap->n,n = A->cmap->n; 2335 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2336 PetscInt *cmap = mat->garray; 2337 PetscInt *diagIdx, *offdiagIdx; 2338 Vec diagV, offdiagV; 2339 PetscScalar *a, *diagA, *offdiagA; 2340 const PetscScalar *ba,*bav; 2341 PetscInt r,j,col,ncols,*bi,*bj; 2342 PetscErrorCode ierr; 2343 Mat B = mat->B; 2344 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2345 2346 PetscFunctionBegin; 2347 /* When a process holds entire A and other processes have no entry */ 2348 if (A->cmap->N == n) { 2349 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2350 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2351 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2352 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2353 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2354 PetscFunctionReturn(0); 2355 } else if (n == 0) { 2356 if (m) { 2357 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2358 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2359 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2360 } 2361 PetscFunctionReturn(0); 2362 } 2363 2364 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2365 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2366 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2367 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2368 2369 /* Get offdiagIdx[] for implicit 0.0 */ 2370 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2371 ba = bav; 2372 bi = b->i; 2373 bj = b->j; 2374 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2375 for (r = 0; r < m; r++) { 2376 ncols = bi[r+1] - bi[r]; 2377 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2378 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2379 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2380 offdiagA[r] = 0.0; 2381 2382 /* Find first hole in the cmap */ 2383 for (j=0; j<ncols; j++) { 2384 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2385 if (col > j && j < cstart) { 2386 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2387 break; 2388 } else if (col > j + n && j >= cstart) { 2389 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2390 break; 2391 } 2392 } 2393 if (j == ncols && ncols < A->cmap->N - n) { 2394 /* a hole is outside compressed Bcols */ 2395 if (ncols == 0) { 2396 if (cstart) { 2397 offdiagIdx[r] = 0; 2398 } else offdiagIdx[r] = cend; 2399 } else { /* ncols > 0 */ 2400 offdiagIdx[r] = cmap[ncols-1] + 1; 2401 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2402 } 2403 } 2404 } 2405 2406 for (j=0; j<ncols; j++) { 2407 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2408 ba++; bj++; 2409 } 2410 } 2411 2412 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2413 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2414 for (r = 0; r < m; ++r) { 2415 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2416 a[r] = diagA[r]; 2417 if (idx) idx[r] = cstart + diagIdx[r]; 2418 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2419 a[r] = diagA[r]; 2420 if (idx) { 2421 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2422 idx[r] = cstart + diagIdx[r]; 2423 } else idx[r] = offdiagIdx[r]; 2424 } 2425 } else { 2426 a[r] = offdiagA[r]; 2427 if (idx) idx[r] = offdiagIdx[r]; 2428 } 2429 } 2430 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2431 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2432 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2433 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2434 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2435 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2436 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2437 PetscFunctionReturn(0); 2438 } 2439 2440 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2441 { 2442 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2443 PetscInt m = A->rmap->n,n = A->cmap->n; 2444 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2445 PetscInt *cmap = mat->garray; 2446 PetscInt *diagIdx, *offdiagIdx; 2447 Vec diagV, offdiagV; 2448 PetscScalar *a, *diagA, *offdiagA; 2449 const PetscScalar *ba,*bav; 2450 PetscInt r,j,col,ncols,*bi,*bj; 2451 PetscErrorCode ierr; 2452 Mat B = mat->B; 2453 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2454 2455 PetscFunctionBegin; 2456 /* When a process holds entire A and other processes have no entry */ 2457 if (A->cmap->N == n) { 2458 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2459 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2460 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2461 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2462 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2463 PetscFunctionReturn(0); 2464 } else if (n == 0) { 2465 if (m) { 2466 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2467 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2468 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2469 } 2470 PetscFunctionReturn(0); 2471 } 2472 2473 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2474 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2475 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2476 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2477 2478 /* Get offdiagIdx[] for implicit 0.0 */ 2479 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2480 ba = bav; 2481 bi = b->i; 2482 bj = b->j; 2483 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2484 for (r = 0; r < m; r++) { 2485 ncols = bi[r+1] - bi[r]; 2486 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2487 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2488 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2489 offdiagA[r] = 0.0; 2490 2491 /* Find first hole in the cmap */ 2492 for (j=0; j<ncols; j++) { 2493 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2494 if (col > j && j < cstart) { 2495 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2496 break; 2497 } else if (col > j + n && j >= cstart) { 2498 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2499 break; 2500 } 2501 } 2502 if (j == ncols && ncols < A->cmap->N - n) { 2503 /* a hole is outside compressed Bcols */ 2504 if (ncols == 0) { 2505 if (cstart) { 2506 offdiagIdx[r] = 0; 2507 } else offdiagIdx[r] = cend; 2508 } else { /* ncols > 0 */ 2509 offdiagIdx[r] = cmap[ncols-1] + 1; 2510 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2511 } 2512 } 2513 } 2514 2515 for (j=0; j<ncols; j++) { 2516 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2517 ba++; bj++; 2518 } 2519 } 2520 2521 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2522 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2523 for (r = 0; r < m; ++r) { 2524 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2525 a[r] = diagA[r]; 2526 if (idx) idx[r] = cstart + diagIdx[r]; 2527 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2528 a[r] = diagA[r]; 2529 if (idx) { 2530 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2531 idx[r] = cstart + diagIdx[r]; 2532 } else idx[r] = offdiagIdx[r]; 2533 } 2534 } else { 2535 a[r] = offdiagA[r]; 2536 if (idx) idx[r] = offdiagIdx[r]; 2537 } 2538 } 2539 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2540 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2541 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2542 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2543 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2544 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2545 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2550 { 2551 PetscErrorCode ierr; 2552 Mat *dummy; 2553 2554 PetscFunctionBegin; 2555 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2556 *newmat = *dummy; 2557 ierr = PetscFree(dummy);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2562 { 2563 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2564 PetscErrorCode ierr; 2565 2566 PetscFunctionBegin; 2567 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2568 A->factorerrortype = a->A->factorerrortype; 2569 PetscFunctionReturn(0); 2570 } 2571 2572 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2573 { 2574 PetscErrorCode ierr; 2575 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2576 2577 PetscFunctionBegin; 2578 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2579 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2580 if (x->assembled) { 2581 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2582 } else { 2583 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2584 } 2585 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2586 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2587 PetscFunctionReturn(0); 2588 } 2589 2590 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2591 { 2592 PetscFunctionBegin; 2593 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2594 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2595 PetscFunctionReturn(0); 2596 } 2597 2598 /*@ 2599 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2600 2601 Collective on Mat 2602 2603 Input Parameters: 2604 + A - the matrix 2605 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2606 2607 Level: advanced 2608 2609 @*/ 2610 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2611 { 2612 PetscErrorCode ierr; 2613 2614 PetscFunctionBegin; 2615 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2616 PetscFunctionReturn(0); 2617 } 2618 2619 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2620 { 2621 PetscErrorCode ierr; 2622 PetscBool sc = PETSC_FALSE,flg; 2623 2624 PetscFunctionBegin; 2625 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2626 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2627 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2628 if (flg) { 2629 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2630 } 2631 ierr = PetscOptionsTail();CHKERRQ(ierr); 2632 PetscFunctionReturn(0); 2633 } 2634 2635 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2636 { 2637 PetscErrorCode ierr; 2638 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2639 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2640 2641 PetscFunctionBegin; 2642 if (!Y->preallocated) { 2643 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2644 } else if (!aij->nz) { 2645 PetscInt nonew = aij->nonew; 2646 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2647 aij->nonew = nonew; 2648 } 2649 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2650 PetscFunctionReturn(0); 2651 } 2652 2653 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2654 { 2655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2656 PetscErrorCode ierr; 2657 2658 PetscFunctionBegin; 2659 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2660 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2661 if (d) { 2662 PetscInt rstart; 2663 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2664 *d += rstart; 2665 2666 } 2667 PetscFunctionReturn(0); 2668 } 2669 2670 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2671 { 2672 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2673 PetscErrorCode ierr; 2674 2675 PetscFunctionBegin; 2676 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2677 PetscFunctionReturn(0); 2678 } 2679 2680 /* -------------------------------------------------------------------*/ 2681 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2682 MatGetRow_MPIAIJ, 2683 MatRestoreRow_MPIAIJ, 2684 MatMult_MPIAIJ, 2685 /* 4*/ MatMultAdd_MPIAIJ, 2686 MatMultTranspose_MPIAIJ, 2687 MatMultTransposeAdd_MPIAIJ, 2688 NULL, 2689 NULL, 2690 NULL, 2691 /*10*/ NULL, 2692 NULL, 2693 NULL, 2694 MatSOR_MPIAIJ, 2695 MatTranspose_MPIAIJ, 2696 /*15*/ MatGetInfo_MPIAIJ, 2697 MatEqual_MPIAIJ, 2698 MatGetDiagonal_MPIAIJ, 2699 MatDiagonalScale_MPIAIJ, 2700 MatNorm_MPIAIJ, 2701 /*20*/ MatAssemblyBegin_MPIAIJ, 2702 MatAssemblyEnd_MPIAIJ, 2703 MatSetOption_MPIAIJ, 2704 MatZeroEntries_MPIAIJ, 2705 /*24*/ MatZeroRows_MPIAIJ, 2706 NULL, 2707 NULL, 2708 NULL, 2709 NULL, 2710 /*29*/ MatSetUp_MPIAIJ, 2711 NULL, 2712 NULL, 2713 MatGetDiagonalBlock_MPIAIJ, 2714 NULL, 2715 /*34*/ MatDuplicate_MPIAIJ, 2716 NULL, 2717 NULL, 2718 NULL, 2719 NULL, 2720 /*39*/ MatAXPY_MPIAIJ, 2721 MatCreateSubMatrices_MPIAIJ, 2722 MatIncreaseOverlap_MPIAIJ, 2723 MatGetValues_MPIAIJ, 2724 MatCopy_MPIAIJ, 2725 /*44*/ MatGetRowMax_MPIAIJ, 2726 MatScale_MPIAIJ, 2727 MatShift_MPIAIJ, 2728 MatDiagonalSet_MPIAIJ, 2729 MatZeroRowsColumns_MPIAIJ, 2730 /*49*/ MatSetRandom_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 NULL, 2735 /*54*/ MatFDColoringCreate_MPIXAIJ, 2736 NULL, 2737 MatSetUnfactored_MPIAIJ, 2738 MatPermute_MPIAIJ, 2739 NULL, 2740 /*59*/ MatCreateSubMatrix_MPIAIJ, 2741 MatDestroy_MPIAIJ, 2742 MatView_MPIAIJ, 2743 NULL, 2744 NULL, 2745 /*64*/ NULL, 2746 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2747 NULL, 2748 NULL, 2749 NULL, 2750 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2751 MatGetRowMinAbs_MPIAIJ, 2752 NULL, 2753 NULL, 2754 NULL, 2755 NULL, 2756 /*75*/ MatFDColoringApply_AIJ, 2757 MatSetFromOptions_MPIAIJ, 2758 NULL, 2759 NULL, 2760 MatFindZeroDiagonals_MPIAIJ, 2761 /*80*/ NULL, 2762 NULL, 2763 NULL, 2764 /*83*/ MatLoad_MPIAIJ, 2765 MatIsSymmetric_MPIAIJ, 2766 NULL, 2767 NULL, 2768 NULL, 2769 NULL, 2770 /*89*/ NULL, 2771 NULL, 2772 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2773 NULL, 2774 NULL, 2775 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 NULL, 2779 MatBindToCPU_MPIAIJ, 2780 /*99*/ MatProductSetFromOptions_MPIAIJ, 2781 NULL, 2782 NULL, 2783 MatConjugate_MPIAIJ, 2784 NULL, 2785 /*104*/MatSetValuesRow_MPIAIJ, 2786 MatRealPart_MPIAIJ, 2787 MatImaginaryPart_MPIAIJ, 2788 NULL, 2789 NULL, 2790 /*109*/NULL, 2791 NULL, 2792 MatGetRowMin_MPIAIJ, 2793 NULL, 2794 MatMissingDiagonal_MPIAIJ, 2795 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2796 NULL, 2797 MatGetGhosts_MPIAIJ, 2798 NULL, 2799 NULL, 2800 /*119*/MatMultDiagonalBlock_MPIAIJ, 2801 NULL, 2802 NULL, 2803 NULL, 2804 MatGetMultiProcBlock_MPIAIJ, 2805 /*124*/MatFindNonzeroRows_MPIAIJ, 2806 MatGetColumnReductions_MPIAIJ, 2807 MatInvertBlockDiagonal_MPIAIJ, 2808 MatInvertVariableBlockDiagonal_MPIAIJ, 2809 MatCreateSubMatricesMPI_MPIAIJ, 2810 /*129*/NULL, 2811 NULL, 2812 NULL, 2813 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2814 NULL, 2815 /*134*/NULL, 2816 NULL, 2817 NULL, 2818 NULL, 2819 NULL, 2820 /*139*/MatSetBlockSizes_MPIAIJ, 2821 NULL, 2822 NULL, 2823 MatFDColoringSetUp_MPIXAIJ, 2824 MatFindOffBlockDiagonalEntries_MPIAIJ, 2825 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2826 /*145*/NULL, 2827 NULL, 2828 NULL 2829 }; 2830 2831 /* ----------------------------------------------------------------------------------------*/ 2832 2833 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2834 { 2835 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2836 PetscErrorCode ierr; 2837 2838 PetscFunctionBegin; 2839 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2840 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2841 PetscFunctionReturn(0); 2842 } 2843 2844 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2845 { 2846 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2847 PetscErrorCode ierr; 2848 2849 PetscFunctionBegin; 2850 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2851 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2852 PetscFunctionReturn(0); 2853 } 2854 2855 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2856 { 2857 Mat_MPIAIJ *b; 2858 PetscErrorCode ierr; 2859 PetscMPIInt size; 2860 2861 PetscFunctionBegin; 2862 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2863 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2864 b = (Mat_MPIAIJ*)B->data; 2865 2866 #if defined(PETSC_USE_CTABLE) 2867 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2868 #else 2869 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2870 #endif 2871 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2872 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2873 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2874 2875 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2876 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2877 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2878 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2879 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2880 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2881 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2882 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2883 2884 if (!B->preallocated) { 2885 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2886 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2887 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2888 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2889 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2890 } 2891 2892 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2893 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2894 B->preallocated = PETSC_TRUE; 2895 B->was_assembled = PETSC_FALSE; 2896 B->assembled = PETSC_FALSE; 2897 PetscFunctionReturn(0); 2898 } 2899 2900 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2901 { 2902 Mat_MPIAIJ *b; 2903 PetscErrorCode ierr; 2904 2905 PetscFunctionBegin; 2906 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2907 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2908 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2909 b = (Mat_MPIAIJ*)B->data; 2910 2911 #if defined(PETSC_USE_CTABLE) 2912 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2913 #else 2914 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2915 #endif 2916 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2917 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2918 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2919 2920 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2921 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2922 B->preallocated = PETSC_TRUE; 2923 B->was_assembled = PETSC_FALSE; 2924 B->assembled = PETSC_FALSE; 2925 PetscFunctionReturn(0); 2926 } 2927 2928 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2929 { 2930 Mat mat; 2931 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2932 PetscErrorCode ierr; 2933 2934 PetscFunctionBegin; 2935 *newmat = NULL; 2936 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2937 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2938 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2939 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2940 a = (Mat_MPIAIJ*)mat->data; 2941 2942 mat->factortype = matin->factortype; 2943 mat->assembled = matin->assembled; 2944 mat->insertmode = NOT_SET_VALUES; 2945 mat->preallocated = matin->preallocated; 2946 2947 a->size = oldmat->size; 2948 a->rank = oldmat->rank; 2949 a->donotstash = oldmat->donotstash; 2950 a->roworiented = oldmat->roworiented; 2951 a->rowindices = NULL; 2952 a->rowvalues = NULL; 2953 a->getrowactive = PETSC_FALSE; 2954 2955 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2956 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2957 2958 if (oldmat->colmap) { 2959 #if defined(PETSC_USE_CTABLE) 2960 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2961 #else 2962 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2963 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2964 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2965 #endif 2966 } else a->colmap = NULL; 2967 if (oldmat->garray) { 2968 PetscInt len; 2969 len = oldmat->B->cmap->n; 2970 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2971 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2972 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2973 } else a->garray = NULL; 2974 2975 /* It may happen MatDuplicate is called with a non-assembled matrix 2976 In fact, MatDuplicate only requires the matrix to be preallocated 2977 This may happen inside a DMCreateMatrix_Shell */ 2978 if (oldmat->lvec) { 2979 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2980 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2981 } 2982 if (oldmat->Mvctx) { 2983 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2984 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2985 } 2986 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2987 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2988 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2989 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2990 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2991 *newmat = mat; 2992 PetscFunctionReturn(0); 2993 } 2994 2995 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2996 { 2997 PetscBool isbinary, ishdf5; 2998 PetscErrorCode ierr; 2999 3000 PetscFunctionBegin; 3001 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3002 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3003 /* force binary viewer to load .info file if it has not yet done so */ 3004 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3005 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3006 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3007 if (isbinary) { 3008 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3009 } else if (ishdf5) { 3010 #if defined(PETSC_HAVE_HDF5) 3011 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3012 #else 3013 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3014 #endif 3015 } else { 3016 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3017 } 3018 PetscFunctionReturn(0); 3019 } 3020 3021 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3022 { 3023 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3024 PetscInt *rowidxs,*colidxs; 3025 PetscScalar *matvals; 3026 PetscErrorCode ierr; 3027 3028 PetscFunctionBegin; 3029 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3030 3031 /* read in matrix header */ 3032 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3033 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3034 M = header[1]; N = header[2]; nz = header[3]; 3035 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3036 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3037 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3038 3039 /* set block sizes from the viewer's .info file */ 3040 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3041 /* set global sizes if not set already */ 3042 if (mat->rmap->N < 0) mat->rmap->N = M; 3043 if (mat->cmap->N < 0) mat->cmap->N = N; 3044 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3045 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3046 3047 /* check if the matrix sizes are correct */ 3048 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3049 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3050 3051 /* read in row lengths and build row indices */ 3052 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3053 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3054 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3055 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3056 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 3057 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3058 /* read in column indices and matrix values */ 3059 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3060 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3061 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3062 /* store matrix indices and values */ 3063 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3064 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3065 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 /* Not scalable because of ISAllGather() unless getting all columns. */ 3070 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3071 { 3072 PetscErrorCode ierr; 3073 IS iscol_local; 3074 PetscBool isstride; 3075 PetscMPIInt lisstride=0,gisstride; 3076 3077 PetscFunctionBegin; 3078 /* check if we are grabbing all columns*/ 3079 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3080 3081 if (isstride) { 3082 PetscInt start,len,mstart,mlen; 3083 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3084 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3085 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3086 if (mstart == start && mlen-mstart == len) lisstride = 1; 3087 } 3088 3089 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3090 if (gisstride) { 3091 PetscInt N; 3092 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3093 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3094 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3095 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3096 } else { 3097 PetscInt cbs; 3098 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3099 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3100 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3101 } 3102 3103 *isseq = iscol_local; 3104 PetscFunctionReturn(0); 3105 } 3106 3107 /* 3108 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3109 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3110 3111 Input Parameters: 3112 mat - matrix 3113 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3114 i.e., mat->rstart <= isrow[i] < mat->rend 3115 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3116 i.e., mat->cstart <= iscol[i] < mat->cend 3117 Output Parameter: 3118 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3119 iscol_o - sequential column index set for retrieving mat->B 3120 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3121 */ 3122 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3123 { 3124 PetscErrorCode ierr; 3125 Vec x,cmap; 3126 const PetscInt *is_idx; 3127 PetscScalar *xarray,*cmaparray; 3128 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3129 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3130 Mat B=a->B; 3131 Vec lvec=a->lvec,lcmap; 3132 PetscInt i,cstart,cend,Bn=B->cmap->N; 3133 MPI_Comm comm; 3134 VecScatter Mvctx=a->Mvctx; 3135 3136 PetscFunctionBegin; 3137 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3138 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3139 3140 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3141 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3142 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3143 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3144 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3145 3146 /* Get start indices */ 3147 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3148 isstart -= ncols; 3149 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3150 3151 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3152 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3153 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3154 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3155 for (i=0; i<ncols; i++) { 3156 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3157 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3158 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3159 } 3160 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3161 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3162 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3163 3164 /* Get iscol_d */ 3165 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3166 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3167 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3168 3169 /* Get isrow_d */ 3170 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3171 rstart = mat->rmap->rstart; 3172 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3173 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3174 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3175 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3176 3177 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3178 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3179 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3180 3181 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3182 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3183 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3184 3185 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3186 3187 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3188 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3189 3190 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3191 /* off-process column indices */ 3192 count = 0; 3193 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3194 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3195 3196 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3197 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3198 for (i=0; i<Bn; i++) { 3199 if (PetscRealPart(xarray[i]) > -1.0) { 3200 idx[count] = i; /* local column index in off-diagonal part B */ 3201 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3202 count++; 3203 } 3204 } 3205 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3206 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3207 3208 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3209 /* cannot ensure iscol_o has same blocksize as iscol! */ 3210 3211 ierr = PetscFree(idx);CHKERRQ(ierr); 3212 *garray = cmap1; 3213 3214 ierr = VecDestroy(&x);CHKERRQ(ierr); 3215 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3216 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3217 PetscFunctionReturn(0); 3218 } 3219 3220 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3221 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3222 { 3223 PetscErrorCode ierr; 3224 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3225 Mat M = NULL; 3226 MPI_Comm comm; 3227 IS iscol_d,isrow_d,iscol_o; 3228 Mat Asub = NULL,Bsub = NULL; 3229 PetscInt n; 3230 3231 PetscFunctionBegin; 3232 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3233 3234 if (call == MAT_REUSE_MATRIX) { 3235 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3236 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3237 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3238 3239 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3240 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3241 3242 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3243 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3244 3245 /* Update diagonal and off-diagonal portions of submat */ 3246 asub = (Mat_MPIAIJ*)(*submat)->data; 3247 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3248 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3249 if (n) { 3250 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3251 } 3252 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3253 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3254 3255 } else { /* call == MAT_INITIAL_MATRIX) */ 3256 const PetscInt *garray; 3257 PetscInt BsubN; 3258 3259 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3260 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3261 3262 /* Create local submatrices Asub and Bsub */ 3263 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3264 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3265 3266 /* Create submatrix M */ 3267 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3268 3269 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3270 asub = (Mat_MPIAIJ*)M->data; 3271 3272 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3273 n = asub->B->cmap->N; 3274 if (BsubN > n) { 3275 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3276 const PetscInt *idx; 3277 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3278 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3279 3280 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3281 j = 0; 3282 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3283 for (i=0; i<n; i++) { 3284 if (j >= BsubN) break; 3285 while (subgarray[i] > garray[j]) j++; 3286 3287 if (subgarray[i] == garray[j]) { 3288 idx_new[i] = idx[j++]; 3289 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3290 } 3291 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3292 3293 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3294 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3295 3296 } else if (BsubN < n) { 3297 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3298 } 3299 3300 ierr = PetscFree(garray);CHKERRQ(ierr); 3301 *submat = M; 3302 3303 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3304 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3305 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3306 3307 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3308 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3309 3310 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3311 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3312 } 3313 PetscFunctionReturn(0); 3314 } 3315 3316 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3317 { 3318 PetscErrorCode ierr; 3319 IS iscol_local=NULL,isrow_d; 3320 PetscInt csize; 3321 PetscInt n,i,j,start,end; 3322 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3323 MPI_Comm comm; 3324 3325 PetscFunctionBegin; 3326 /* If isrow has same processor distribution as mat, 3327 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3328 if (call == MAT_REUSE_MATRIX) { 3329 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3330 if (isrow_d) { 3331 sameRowDist = PETSC_TRUE; 3332 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3333 } else { 3334 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3335 if (iscol_local) { 3336 sameRowDist = PETSC_TRUE; 3337 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3338 } 3339 } 3340 } else { 3341 /* Check if isrow has same processor distribution as mat */ 3342 sameDist[0] = PETSC_FALSE; 3343 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3344 if (!n) { 3345 sameDist[0] = PETSC_TRUE; 3346 } else { 3347 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3348 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3349 if (i >= start && j < end) { 3350 sameDist[0] = PETSC_TRUE; 3351 } 3352 } 3353 3354 /* Check if iscol has same processor distribution as mat */ 3355 sameDist[1] = PETSC_FALSE; 3356 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3357 if (!n) { 3358 sameDist[1] = PETSC_TRUE; 3359 } else { 3360 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3361 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3362 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3363 } 3364 3365 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3366 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr); 3367 sameRowDist = tsameDist[0]; 3368 } 3369 3370 if (sameRowDist) { 3371 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3372 /* isrow and iscol have same processor distribution as mat */ 3373 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3374 PetscFunctionReturn(0); 3375 } else { /* sameRowDist */ 3376 /* isrow has same processor distribution as mat */ 3377 if (call == MAT_INITIAL_MATRIX) { 3378 PetscBool sorted; 3379 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3380 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3381 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3382 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3383 3384 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3385 if (sorted) { 3386 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3387 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3388 PetscFunctionReturn(0); 3389 } 3390 } else { /* call == MAT_REUSE_MATRIX */ 3391 IS iscol_sub; 3392 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3393 if (iscol_sub) { 3394 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3395 PetscFunctionReturn(0); 3396 } 3397 } 3398 } 3399 } 3400 3401 /* General case: iscol -> iscol_local which has global size of iscol */ 3402 if (call == MAT_REUSE_MATRIX) { 3403 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3404 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3405 } else { 3406 if (!iscol_local) { 3407 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3408 } 3409 } 3410 3411 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3412 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3413 3414 if (call == MAT_INITIAL_MATRIX) { 3415 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3416 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3417 } 3418 PetscFunctionReturn(0); 3419 } 3420 3421 /*@C 3422 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3423 and "off-diagonal" part of the matrix in CSR format. 3424 3425 Collective 3426 3427 Input Parameters: 3428 + comm - MPI communicator 3429 . A - "diagonal" portion of matrix 3430 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3431 - garray - global index of B columns 3432 3433 Output Parameter: 3434 . mat - the matrix, with input A as its local diagonal matrix 3435 Level: advanced 3436 3437 Notes: 3438 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3439 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3440 3441 .seealso: MatCreateMPIAIJWithSplitArrays() 3442 @*/ 3443 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3444 { 3445 PetscErrorCode ierr; 3446 Mat_MPIAIJ *maij; 3447 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3448 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3449 const PetscScalar *oa; 3450 Mat Bnew; 3451 PetscInt m,n,N; 3452 3453 PetscFunctionBegin; 3454 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3455 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3456 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3457 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3458 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3459 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3460 3461 /* Get global columns of mat */ 3462 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3463 3464 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3465 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3466 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3467 maij = (Mat_MPIAIJ*)(*mat)->data; 3468 3469 (*mat)->preallocated = PETSC_TRUE; 3470 3471 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3472 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3473 3474 /* Set A as diagonal portion of *mat */ 3475 maij->A = A; 3476 3477 nz = oi[m]; 3478 for (i=0; i<nz; i++) { 3479 col = oj[i]; 3480 oj[i] = garray[col]; 3481 } 3482 3483 /* Set Bnew as off-diagonal portion of *mat */ 3484 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3485 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3486 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3487 bnew = (Mat_SeqAIJ*)Bnew->data; 3488 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3489 maij->B = Bnew; 3490 3491 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3492 3493 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3494 b->free_a = PETSC_FALSE; 3495 b->free_ij = PETSC_FALSE; 3496 ierr = MatDestroy(&B);CHKERRQ(ierr); 3497 3498 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3499 bnew->free_a = PETSC_TRUE; 3500 bnew->free_ij = PETSC_TRUE; 3501 3502 /* condense columns of maij->B */ 3503 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3504 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3505 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3506 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3507 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3508 PetscFunctionReturn(0); 3509 } 3510 3511 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3512 3513 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3514 { 3515 PetscErrorCode ierr; 3516 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3517 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3518 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3519 Mat M,Msub,B=a->B; 3520 MatScalar *aa; 3521 Mat_SeqAIJ *aij; 3522 PetscInt *garray = a->garray,*colsub,Ncols; 3523 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3524 IS iscol_sub,iscmap; 3525 const PetscInt *is_idx,*cmap; 3526 PetscBool allcolumns=PETSC_FALSE; 3527 MPI_Comm comm; 3528 3529 PetscFunctionBegin; 3530 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3531 if (call == MAT_REUSE_MATRIX) { 3532 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3533 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3534 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3535 3536 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3537 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3538 3539 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3540 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3541 3542 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3543 3544 } else { /* call == MAT_INITIAL_MATRIX) */ 3545 PetscBool flg; 3546 3547 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3548 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3549 3550 /* (1) iscol -> nonscalable iscol_local */ 3551 /* Check for special case: each processor gets entire matrix columns */ 3552 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3553 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3554 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3555 if (allcolumns) { 3556 iscol_sub = iscol_local; 3557 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3558 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3559 3560 } else { 3561 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3562 PetscInt *idx,*cmap1,k; 3563 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3564 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3565 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3566 count = 0; 3567 k = 0; 3568 for (i=0; i<Ncols; i++) { 3569 j = is_idx[i]; 3570 if (j >= cstart && j < cend) { 3571 /* diagonal part of mat */ 3572 idx[count] = j; 3573 cmap1[count++] = i; /* column index in submat */ 3574 } else if (Bn) { 3575 /* off-diagonal part of mat */ 3576 if (j == garray[k]) { 3577 idx[count] = j; 3578 cmap1[count++] = i; /* column index in submat */ 3579 } else if (j > garray[k]) { 3580 while (j > garray[k] && k < Bn-1) k++; 3581 if (j == garray[k]) { 3582 idx[count] = j; 3583 cmap1[count++] = i; /* column index in submat */ 3584 } 3585 } 3586 } 3587 } 3588 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3589 3590 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3591 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3592 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3593 3594 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3595 } 3596 3597 /* (3) Create sequential Msub */ 3598 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3599 } 3600 3601 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3602 aij = (Mat_SeqAIJ*)(Msub)->data; 3603 ii = aij->i; 3604 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3605 3606 /* 3607 m - number of local rows 3608 Ncols - number of columns (same on all processors) 3609 rstart - first row in new global matrix generated 3610 */ 3611 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3612 3613 if (call == MAT_INITIAL_MATRIX) { 3614 /* (4) Create parallel newmat */ 3615 PetscMPIInt rank,size; 3616 PetscInt csize; 3617 3618 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3619 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3620 3621 /* 3622 Determine the number of non-zeros in the diagonal and off-diagonal 3623 portions of the matrix in order to do correct preallocation 3624 */ 3625 3626 /* first get start and end of "diagonal" columns */ 3627 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3628 if (csize == PETSC_DECIDE) { 3629 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3630 if (mglobal == Ncols) { /* square matrix */ 3631 nlocal = m; 3632 } else { 3633 nlocal = Ncols/size + ((Ncols % size) > rank); 3634 } 3635 } else { 3636 nlocal = csize; 3637 } 3638 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3639 rstart = rend - nlocal; 3640 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3641 3642 /* next, compute all the lengths */ 3643 jj = aij->j; 3644 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3645 olens = dlens + m; 3646 for (i=0; i<m; i++) { 3647 jend = ii[i+1] - ii[i]; 3648 olen = 0; 3649 dlen = 0; 3650 for (j=0; j<jend; j++) { 3651 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3652 else dlen++; 3653 jj++; 3654 } 3655 olens[i] = olen; 3656 dlens[i] = dlen; 3657 } 3658 3659 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3660 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3661 3662 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3663 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3664 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3665 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3666 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3667 ierr = PetscFree(dlens);CHKERRQ(ierr); 3668 3669 } else { /* call == MAT_REUSE_MATRIX */ 3670 M = *newmat; 3671 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3672 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3673 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3674 /* 3675 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3676 rather than the slower MatSetValues(). 3677 */ 3678 M->was_assembled = PETSC_TRUE; 3679 M->assembled = PETSC_FALSE; 3680 } 3681 3682 /* (5) Set values of Msub to *newmat */ 3683 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3684 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3685 3686 jj = aij->j; 3687 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3688 for (i=0; i<m; i++) { 3689 row = rstart + i; 3690 nz = ii[i+1] - ii[i]; 3691 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3692 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3693 jj += nz; aa += nz; 3694 } 3695 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3696 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3697 3698 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3699 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3700 3701 ierr = PetscFree(colsub);CHKERRQ(ierr); 3702 3703 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3704 if (call == MAT_INITIAL_MATRIX) { 3705 *newmat = M; 3706 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3707 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3708 3709 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3710 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3711 3712 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3713 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3714 3715 if (iscol_local) { 3716 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3717 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3718 } 3719 } 3720 PetscFunctionReturn(0); 3721 } 3722 3723 /* 3724 Not great since it makes two copies of the submatrix, first an SeqAIJ 3725 in local and then by concatenating the local matrices the end result. 3726 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3727 3728 Note: This requires a sequential iscol with all indices. 3729 */ 3730 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3731 { 3732 PetscErrorCode ierr; 3733 PetscMPIInt rank,size; 3734 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3735 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3736 Mat M,Mreuse; 3737 MatScalar *aa,*vwork; 3738 MPI_Comm comm; 3739 Mat_SeqAIJ *aij; 3740 PetscBool colflag,allcolumns=PETSC_FALSE; 3741 3742 PetscFunctionBegin; 3743 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3744 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3745 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3746 3747 /* Check for special case: each processor gets entire matrix columns */ 3748 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3749 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3750 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3751 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3752 3753 if (call == MAT_REUSE_MATRIX) { 3754 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3755 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3756 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3757 } else { 3758 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3759 } 3760 3761 /* 3762 m - number of local rows 3763 n - number of columns (same on all processors) 3764 rstart - first row in new global matrix generated 3765 */ 3766 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3767 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3768 if (call == MAT_INITIAL_MATRIX) { 3769 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3770 ii = aij->i; 3771 jj = aij->j; 3772 3773 /* 3774 Determine the number of non-zeros in the diagonal and off-diagonal 3775 portions of the matrix in order to do correct preallocation 3776 */ 3777 3778 /* first get start and end of "diagonal" columns */ 3779 if (csize == PETSC_DECIDE) { 3780 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3781 if (mglobal == n) { /* square matrix */ 3782 nlocal = m; 3783 } else { 3784 nlocal = n/size + ((n % size) > rank); 3785 } 3786 } else { 3787 nlocal = csize; 3788 } 3789 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3790 rstart = rend - nlocal; 3791 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3792 3793 /* next, compute all the lengths */ 3794 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3795 olens = dlens + m; 3796 for (i=0; i<m; i++) { 3797 jend = ii[i+1] - ii[i]; 3798 olen = 0; 3799 dlen = 0; 3800 for (j=0; j<jend; j++) { 3801 if (*jj < rstart || *jj >= rend) olen++; 3802 else dlen++; 3803 jj++; 3804 } 3805 olens[i] = olen; 3806 dlens[i] = dlen; 3807 } 3808 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3809 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3810 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3811 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3812 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3813 ierr = PetscFree(dlens);CHKERRQ(ierr); 3814 } else { 3815 PetscInt ml,nl; 3816 3817 M = *newmat; 3818 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3819 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3820 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3821 /* 3822 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3823 rather than the slower MatSetValues(). 3824 */ 3825 M->was_assembled = PETSC_TRUE; 3826 M->assembled = PETSC_FALSE; 3827 } 3828 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3829 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3830 ii = aij->i; 3831 jj = aij->j; 3832 3833 /* trigger copy to CPU if needed */ 3834 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3835 for (i=0; i<m; i++) { 3836 row = rstart + i; 3837 nz = ii[i+1] - ii[i]; 3838 cwork = jj; jj += nz; 3839 vwork = aa; aa += nz; 3840 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3841 } 3842 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3843 3844 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3845 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3846 *newmat = M; 3847 3848 /* save submatrix used in processor for next request */ 3849 if (call == MAT_INITIAL_MATRIX) { 3850 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3851 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3852 } 3853 PetscFunctionReturn(0); 3854 } 3855 3856 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3857 { 3858 PetscInt m,cstart, cend,j,nnz,i,d; 3859 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3860 const PetscInt *JJ; 3861 PetscErrorCode ierr; 3862 PetscBool nooffprocentries; 3863 3864 PetscFunctionBegin; 3865 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3866 3867 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3868 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3869 m = B->rmap->n; 3870 cstart = B->cmap->rstart; 3871 cend = B->cmap->rend; 3872 rstart = B->rmap->rstart; 3873 3874 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3875 3876 if (PetscDefined(USE_DEBUG)) { 3877 for (i=0; i<m; i++) { 3878 nnz = Ii[i+1]- Ii[i]; 3879 JJ = J + Ii[i]; 3880 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3881 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3882 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3883 } 3884 } 3885 3886 for (i=0; i<m; i++) { 3887 nnz = Ii[i+1]- Ii[i]; 3888 JJ = J + Ii[i]; 3889 nnz_max = PetscMax(nnz_max,nnz); 3890 d = 0; 3891 for (j=0; j<nnz; j++) { 3892 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3893 } 3894 d_nnz[i] = d; 3895 o_nnz[i] = nnz - d; 3896 } 3897 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3898 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3899 3900 for (i=0; i<m; i++) { 3901 ii = i + rstart; 3902 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3903 } 3904 nooffprocentries = B->nooffprocentries; 3905 B->nooffprocentries = PETSC_TRUE; 3906 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3907 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3908 B->nooffprocentries = nooffprocentries; 3909 3910 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3911 PetscFunctionReturn(0); 3912 } 3913 3914 /*@ 3915 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3916 (the default parallel PETSc format). 3917 3918 Collective 3919 3920 Input Parameters: 3921 + B - the matrix 3922 . i - the indices into j for the start of each local row (starts with zero) 3923 . j - the column indices for each local row (starts with zero) 3924 - v - optional values in the matrix 3925 3926 Level: developer 3927 3928 Notes: 3929 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3930 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3931 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3932 3933 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3934 3935 The format which is used for the sparse matrix input, is equivalent to a 3936 row-major ordering.. i.e for the following matrix, the input data expected is 3937 as shown 3938 3939 $ 1 0 0 3940 $ 2 0 3 P0 3941 $ ------- 3942 $ 4 5 6 P1 3943 $ 3944 $ Process0 [P0]: rows_owned=[0,1] 3945 $ i = {0,1,3} [size = nrow+1 = 2+1] 3946 $ j = {0,0,2} [size = 3] 3947 $ v = {1,2,3} [size = 3] 3948 $ 3949 $ Process1 [P1]: rows_owned=[2] 3950 $ i = {0,3} [size = nrow+1 = 1+1] 3951 $ j = {0,1,2} [size = 3] 3952 $ v = {4,5,6} [size = 3] 3953 3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3955 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3956 @*/ 3957 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3958 { 3959 PetscErrorCode ierr; 3960 3961 PetscFunctionBegin; 3962 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3963 PetscFunctionReturn(0); 3964 } 3965 3966 /*@C 3967 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3968 (the default parallel PETSc format). For good matrix assembly performance 3969 the user should preallocate the matrix storage by setting the parameters 3970 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3971 performance can be increased by more than a factor of 50. 3972 3973 Collective 3974 3975 Input Parameters: 3976 + B - the matrix 3977 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3978 (same value is used for all local rows) 3979 . d_nnz - array containing the number of nonzeros in the various rows of the 3980 DIAGONAL portion of the local submatrix (possibly different for each row) 3981 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3982 The size of this array is equal to the number of local rows, i.e 'm'. 3983 For matrices that will be factored, you must leave room for (and set) 3984 the diagonal entry even if it is zero. 3985 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3986 submatrix (same value is used for all local rows). 3987 - o_nnz - array containing the number of nonzeros in the various rows of the 3988 OFF-DIAGONAL portion of the local submatrix (possibly different for 3989 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3990 structure. The size of this array is equal to the number 3991 of local rows, i.e 'm'. 3992 3993 If the *_nnz parameter is given then the *_nz parameter is ignored 3994 3995 The AIJ format (also called the Yale sparse matrix format or 3996 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3997 storage. The stored row and column indices begin with zero. 3998 See Users-Manual: ch_mat for details. 3999 4000 The parallel matrix is partitioned such that the first m0 rows belong to 4001 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4002 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4003 4004 The DIAGONAL portion of the local submatrix of a processor can be defined 4005 as the submatrix which is obtained by extraction the part corresponding to 4006 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4007 first row that belongs to the processor, r2 is the last row belonging to 4008 the this processor, and c1-c2 is range of indices of the local part of a 4009 vector suitable for applying the matrix to. This is an mxn matrix. In the 4010 common case of a square matrix, the row and column ranges are the same and 4011 the DIAGONAL part is also square. The remaining portion of the local 4012 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4013 4014 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4015 4016 You can call MatGetInfo() to get information on how effective the preallocation was; 4017 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4018 You can also run with the option -info and look for messages with the string 4019 malloc in them to see if additional memory allocation was needed. 4020 4021 Example usage: 4022 4023 Consider the following 8x8 matrix with 34 non-zero values, that is 4024 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4025 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4026 as follows: 4027 4028 .vb 4029 1 2 0 | 0 3 0 | 0 4 4030 Proc0 0 5 6 | 7 0 0 | 8 0 4031 9 0 10 | 11 0 0 | 12 0 4032 ------------------------------------- 4033 13 0 14 | 15 16 17 | 0 0 4034 Proc1 0 18 0 | 19 20 21 | 0 0 4035 0 0 0 | 22 23 0 | 24 0 4036 ------------------------------------- 4037 Proc2 25 26 27 | 0 0 28 | 29 0 4038 30 0 0 | 31 32 33 | 0 34 4039 .ve 4040 4041 This can be represented as a collection of submatrices as: 4042 4043 .vb 4044 A B C 4045 D E F 4046 G H I 4047 .ve 4048 4049 Where the submatrices A,B,C are owned by proc0, D,E,F are 4050 owned by proc1, G,H,I are owned by proc2. 4051 4052 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4053 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4054 The 'M','N' parameters are 8,8, and have the same values on all procs. 4055 4056 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4057 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4058 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4059 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4060 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4061 matrix, ans [DF] as another SeqAIJ matrix. 4062 4063 When d_nz, o_nz parameters are specified, d_nz storage elements are 4064 allocated for every row of the local diagonal submatrix, and o_nz 4065 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4066 One way to choose d_nz and o_nz is to use the max nonzerors per local 4067 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4068 In this case, the values of d_nz,o_nz are: 4069 .vb 4070 proc0 : dnz = 2, o_nz = 2 4071 proc1 : dnz = 3, o_nz = 2 4072 proc2 : dnz = 1, o_nz = 4 4073 .ve 4074 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4075 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4076 for proc3. i.e we are using 12+15+10=37 storage locations to store 4077 34 values. 4078 4079 When d_nnz, o_nnz parameters are specified, the storage is specified 4080 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4081 In the above case the values for d_nnz,o_nnz are: 4082 .vb 4083 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4084 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4085 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4086 .ve 4087 Here the space allocated is sum of all the above values i.e 34, and 4088 hence pre-allocation is perfect. 4089 4090 Level: intermediate 4091 4092 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4093 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4094 @*/ 4095 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4096 { 4097 PetscErrorCode ierr; 4098 4099 PetscFunctionBegin; 4100 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4101 PetscValidType(B,1); 4102 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4103 PetscFunctionReturn(0); 4104 } 4105 4106 /*@ 4107 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4108 CSR format for the local rows. 4109 4110 Collective 4111 4112 Input Parameters: 4113 + comm - MPI communicator 4114 . m - number of local rows (Cannot be PETSC_DECIDE) 4115 . n - This value should be the same as the local size used in creating the 4116 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4117 calculated if N is given) For square matrices n is almost always m. 4118 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4119 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4120 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4121 . j - column indices 4122 - a - matrix values 4123 4124 Output Parameter: 4125 . mat - the matrix 4126 4127 Level: intermediate 4128 4129 Notes: 4130 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4131 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4132 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4133 4134 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4135 4136 The format which is used for the sparse matrix input, is equivalent to a 4137 row-major ordering.. i.e for the following matrix, the input data expected is 4138 as shown 4139 4140 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4141 4142 $ 1 0 0 4143 $ 2 0 3 P0 4144 $ ------- 4145 $ 4 5 6 P1 4146 $ 4147 $ Process0 [P0]: rows_owned=[0,1] 4148 $ i = {0,1,3} [size = nrow+1 = 2+1] 4149 $ j = {0,0,2} [size = 3] 4150 $ v = {1,2,3} [size = 3] 4151 $ 4152 $ Process1 [P1]: rows_owned=[2] 4153 $ i = {0,3} [size = nrow+1 = 1+1] 4154 $ j = {0,1,2} [size = 3] 4155 $ v = {4,5,6} [size = 3] 4156 4157 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4158 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4159 @*/ 4160 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4161 { 4162 PetscErrorCode ierr; 4163 4164 PetscFunctionBegin; 4165 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4166 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4167 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4168 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4169 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4170 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4171 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4172 PetscFunctionReturn(0); 4173 } 4174 4175 /*@ 4176 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4177 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4178 4179 Collective 4180 4181 Input Parameters: 4182 + mat - the matrix 4183 . m - number of local rows (Cannot be PETSC_DECIDE) 4184 . n - This value should be the same as the local size used in creating the 4185 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4186 calculated if N is given) For square matrices n is almost always m. 4187 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4188 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4189 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4190 . J - column indices 4191 - v - matrix values 4192 4193 Level: intermediate 4194 4195 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4196 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4197 @*/ 4198 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4199 { 4200 PetscErrorCode ierr; 4201 PetscInt cstart,nnz,i,j; 4202 PetscInt *ld; 4203 PetscBool nooffprocentries; 4204 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4205 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4206 PetscScalar *ad = Ad->a, *ao = Ao->a; 4207 const PetscInt *Adi = Ad->i; 4208 PetscInt ldi,Iii,md; 4209 4210 PetscFunctionBegin; 4211 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4212 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4213 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4214 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4215 4216 cstart = mat->cmap->rstart; 4217 if (!Aij->ld) { 4218 /* count number of entries below block diagonal */ 4219 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4220 Aij->ld = ld; 4221 for (i=0; i<m; i++) { 4222 nnz = Ii[i+1]- Ii[i]; 4223 j = 0; 4224 while (J[j] < cstart && j < nnz) {j++;} 4225 J += nnz; 4226 ld[i] = j; 4227 } 4228 } else { 4229 ld = Aij->ld; 4230 } 4231 4232 for (i=0; i<m; i++) { 4233 nnz = Ii[i+1]- Ii[i]; 4234 Iii = Ii[i]; 4235 ldi = ld[i]; 4236 md = Adi[i+1]-Adi[i]; 4237 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4238 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4239 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4240 ad += md; 4241 ao += nnz - md; 4242 } 4243 nooffprocentries = mat->nooffprocentries; 4244 mat->nooffprocentries = PETSC_TRUE; 4245 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4246 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4247 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4248 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4249 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4250 mat->nooffprocentries = nooffprocentries; 4251 PetscFunctionReturn(0); 4252 } 4253 4254 /*@C 4255 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4256 (the default parallel PETSc format). For good matrix assembly performance 4257 the user should preallocate the matrix storage by setting the parameters 4258 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4259 performance can be increased by more than a factor of 50. 4260 4261 Collective 4262 4263 Input Parameters: 4264 + comm - MPI communicator 4265 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4266 This value should be the same as the local size used in creating the 4267 y vector for the matrix-vector product y = Ax. 4268 . n - This value should be the same as the local size used in creating the 4269 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4270 calculated if N is given) For square matrices n is almost always m. 4271 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4272 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4273 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4274 (same value is used for all local rows) 4275 . d_nnz - array containing the number of nonzeros in the various rows of the 4276 DIAGONAL portion of the local submatrix (possibly different for each row) 4277 or NULL, if d_nz is used to specify the nonzero structure. 4278 The size of this array is equal to the number of local rows, i.e 'm'. 4279 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4280 submatrix (same value is used for all local rows). 4281 - o_nnz - array containing the number of nonzeros in the various rows of the 4282 OFF-DIAGONAL portion of the local submatrix (possibly different for 4283 each row) or NULL, if o_nz is used to specify the nonzero 4284 structure. The size of this array is equal to the number 4285 of local rows, i.e 'm'. 4286 4287 Output Parameter: 4288 . A - the matrix 4289 4290 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4291 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4292 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4293 4294 Notes: 4295 If the *_nnz parameter is given then the *_nz parameter is ignored 4296 4297 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4298 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4299 storage requirements for this matrix. 4300 4301 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4302 processor than it must be used on all processors that share the object for 4303 that argument. 4304 4305 The user MUST specify either the local or global matrix dimensions 4306 (possibly both). 4307 4308 The parallel matrix is partitioned across processors such that the 4309 first m0 rows belong to process 0, the next m1 rows belong to 4310 process 1, the next m2 rows belong to process 2 etc.. where 4311 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4312 values corresponding to [m x N] submatrix. 4313 4314 The columns are logically partitioned with the n0 columns belonging 4315 to 0th partition, the next n1 columns belonging to the next 4316 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4317 4318 The DIAGONAL portion of the local submatrix on any given processor 4319 is the submatrix corresponding to the rows and columns m,n 4320 corresponding to the given processor. i.e diagonal matrix on 4321 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4322 etc. The remaining portion of the local submatrix [m x (N-n)] 4323 constitute the OFF-DIAGONAL portion. The example below better 4324 illustrates this concept. 4325 4326 For a square global matrix we define each processor's diagonal portion 4327 to be its local rows and the corresponding columns (a square submatrix); 4328 each processor's off-diagonal portion encompasses the remainder of the 4329 local matrix (a rectangular submatrix). 4330 4331 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4332 4333 When calling this routine with a single process communicator, a matrix of 4334 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4335 type of communicator, use the construction mechanism 4336 .vb 4337 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4338 .ve 4339 4340 $ MatCreate(...,&A); 4341 $ MatSetType(A,MATMPIAIJ); 4342 $ MatSetSizes(A, m,n,M,N); 4343 $ MatMPIAIJSetPreallocation(A,...); 4344 4345 By default, this format uses inodes (identical nodes) when possible. 4346 We search for consecutive rows with the same nonzero structure, thereby 4347 reusing matrix information to achieve increased efficiency. 4348 4349 Options Database Keys: 4350 + -mat_no_inode - Do not use inodes 4351 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4352 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4353 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4354 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4355 4356 Example usage: 4357 4358 Consider the following 8x8 matrix with 34 non-zero values, that is 4359 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4360 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4361 as follows 4362 4363 .vb 4364 1 2 0 | 0 3 0 | 0 4 4365 Proc0 0 5 6 | 7 0 0 | 8 0 4366 9 0 10 | 11 0 0 | 12 0 4367 ------------------------------------- 4368 13 0 14 | 15 16 17 | 0 0 4369 Proc1 0 18 0 | 19 20 21 | 0 0 4370 0 0 0 | 22 23 0 | 24 0 4371 ------------------------------------- 4372 Proc2 25 26 27 | 0 0 28 | 29 0 4373 30 0 0 | 31 32 33 | 0 34 4374 .ve 4375 4376 This can be represented as a collection of submatrices as 4377 4378 .vb 4379 A B C 4380 D E F 4381 G H I 4382 .ve 4383 4384 Where the submatrices A,B,C are owned by proc0, D,E,F are 4385 owned by proc1, G,H,I are owned by proc2. 4386 4387 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4388 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4389 The 'M','N' parameters are 8,8, and have the same values on all procs. 4390 4391 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4392 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4393 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4394 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4395 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4396 matrix, ans [DF] as another SeqAIJ matrix. 4397 4398 When d_nz, o_nz parameters are specified, d_nz storage elements are 4399 allocated for every row of the local diagonal submatrix, and o_nz 4400 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4401 One way to choose d_nz and o_nz is to use the max nonzerors per local 4402 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4403 In this case, the values of d_nz,o_nz are 4404 .vb 4405 proc0 : dnz = 2, o_nz = 2 4406 proc1 : dnz = 3, o_nz = 2 4407 proc2 : dnz = 1, o_nz = 4 4408 .ve 4409 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4410 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4411 for proc3. i.e we are using 12+15+10=37 storage locations to store 4412 34 values. 4413 4414 When d_nnz, o_nnz parameters are specified, the storage is specified 4415 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4416 In the above case the values for d_nnz,o_nnz are 4417 .vb 4418 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4419 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4420 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4421 .ve 4422 Here the space allocated is sum of all the above values i.e 34, and 4423 hence pre-allocation is perfect. 4424 4425 Level: intermediate 4426 4427 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4428 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4429 @*/ 4430 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4431 { 4432 PetscErrorCode ierr; 4433 PetscMPIInt size; 4434 4435 PetscFunctionBegin; 4436 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4437 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4438 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4439 if (size > 1) { 4440 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4441 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4442 } else { 4443 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4444 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4445 } 4446 PetscFunctionReturn(0); 4447 } 4448 4449 /*@C 4450 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4451 4452 Not collective 4453 4454 Input Parameter: 4455 . A - The MPIAIJ matrix 4456 4457 Output Parameters: 4458 + Ad - The local diagonal block as a SeqAIJ matrix 4459 . Ao - The local off-diagonal block as a SeqAIJ matrix 4460 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4461 4462 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4463 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4464 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4465 local column numbers to global column numbers in the original matrix. 4466 4467 Level: intermediate 4468 4469 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4470 @*/ 4471 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4472 { 4473 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4474 PetscBool flg; 4475 PetscErrorCode ierr; 4476 4477 PetscFunctionBegin; 4478 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4479 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4480 if (Ad) *Ad = a->A; 4481 if (Ao) *Ao = a->B; 4482 if (colmap) *colmap = a->garray; 4483 PetscFunctionReturn(0); 4484 } 4485 4486 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4487 { 4488 PetscErrorCode ierr; 4489 PetscInt m,N,i,rstart,nnz,Ii; 4490 PetscInt *indx; 4491 PetscScalar *values; 4492 4493 PetscFunctionBegin; 4494 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4495 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4496 PetscInt *dnz,*onz,sum,bs,cbs; 4497 4498 if (n == PETSC_DECIDE) { 4499 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4500 } 4501 /* Check sum(n) = N */ 4502 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4503 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4504 4505 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4506 rstart -= m; 4507 4508 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4509 for (i=0; i<m; i++) { 4510 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4511 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4512 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4513 } 4514 4515 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4516 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4517 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4518 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4519 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4520 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4521 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4522 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4523 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4524 } 4525 4526 /* numeric phase */ 4527 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4528 for (i=0; i<m; i++) { 4529 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4530 Ii = i + rstart; 4531 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4532 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4533 } 4534 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4535 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4536 PetscFunctionReturn(0); 4537 } 4538 4539 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4540 { 4541 PetscErrorCode ierr; 4542 PetscMPIInt rank; 4543 PetscInt m,N,i,rstart,nnz; 4544 size_t len; 4545 const PetscInt *indx; 4546 PetscViewer out; 4547 char *name; 4548 Mat B; 4549 const PetscScalar *values; 4550 4551 PetscFunctionBegin; 4552 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4553 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4554 /* Should this be the type of the diagonal block of A? */ 4555 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4556 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4557 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4558 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4559 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4560 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4561 for (i=0; i<m; i++) { 4562 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4563 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4564 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4565 } 4566 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4567 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4568 4569 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4570 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4571 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4572 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4573 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4574 ierr = PetscFree(name);CHKERRQ(ierr); 4575 ierr = MatView(B,out);CHKERRQ(ierr); 4576 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4577 ierr = MatDestroy(&B);CHKERRQ(ierr); 4578 PetscFunctionReturn(0); 4579 } 4580 4581 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4582 { 4583 PetscErrorCode ierr; 4584 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4585 4586 PetscFunctionBegin; 4587 if (!merge) PetscFunctionReturn(0); 4588 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4589 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4590 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4591 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4592 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4593 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4594 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4595 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4596 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4597 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4598 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4599 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4600 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4601 ierr = PetscFree(merge);CHKERRQ(ierr); 4602 PetscFunctionReturn(0); 4603 } 4604 4605 #include <../src/mat/utils/freespace.h> 4606 #include <petscbt.h> 4607 4608 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4609 { 4610 PetscErrorCode ierr; 4611 MPI_Comm comm; 4612 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4613 PetscMPIInt size,rank,taga,*len_s; 4614 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4615 PetscInt proc,m; 4616 PetscInt **buf_ri,**buf_rj; 4617 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4618 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4619 MPI_Request *s_waits,*r_waits; 4620 MPI_Status *status; 4621 MatScalar *aa=a->a; 4622 MatScalar **abuf_r,*ba_i; 4623 Mat_Merge_SeqsToMPI *merge; 4624 PetscContainer container; 4625 4626 PetscFunctionBegin; 4627 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4628 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4629 4630 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4631 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4632 4633 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4634 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4635 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4636 4637 bi = merge->bi; 4638 bj = merge->bj; 4639 buf_ri = merge->buf_ri; 4640 buf_rj = merge->buf_rj; 4641 4642 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4643 owners = merge->rowmap->range; 4644 len_s = merge->len_s; 4645 4646 /* send and recv matrix values */ 4647 /*-----------------------------*/ 4648 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4649 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4650 4651 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4652 for (proc=0,k=0; proc<size; proc++) { 4653 if (!len_s[proc]) continue; 4654 i = owners[proc]; 4655 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4656 k++; 4657 } 4658 4659 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4660 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4661 ierr = PetscFree(status);CHKERRQ(ierr); 4662 4663 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4664 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4665 4666 /* insert mat values of mpimat */ 4667 /*----------------------------*/ 4668 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4669 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4670 4671 for (k=0; k<merge->nrecv; k++) { 4672 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4673 nrows = *(buf_ri_k[k]); 4674 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4675 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4676 } 4677 4678 /* set values of ba */ 4679 m = merge->rowmap->n; 4680 for (i=0; i<m; i++) { 4681 arow = owners[rank] + i; 4682 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4683 bnzi = bi[i+1] - bi[i]; 4684 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4685 4686 /* add local non-zero vals of this proc's seqmat into ba */ 4687 anzi = ai[arow+1] - ai[arow]; 4688 aj = a->j + ai[arow]; 4689 aa = a->a + ai[arow]; 4690 nextaj = 0; 4691 for (j=0; nextaj<anzi; j++) { 4692 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4693 ba_i[j] += aa[nextaj++]; 4694 } 4695 } 4696 4697 /* add received vals into ba */ 4698 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4699 /* i-th row */ 4700 if (i == *nextrow[k]) { 4701 anzi = *(nextai[k]+1) - *nextai[k]; 4702 aj = buf_rj[k] + *(nextai[k]); 4703 aa = abuf_r[k] + *(nextai[k]); 4704 nextaj = 0; 4705 for (j=0; nextaj<anzi; j++) { 4706 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4707 ba_i[j] += aa[nextaj++]; 4708 } 4709 } 4710 nextrow[k]++; nextai[k]++; 4711 } 4712 } 4713 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4714 } 4715 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4716 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4717 4718 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4719 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4720 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4721 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4722 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4723 PetscFunctionReturn(0); 4724 } 4725 4726 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4727 { 4728 PetscErrorCode ierr; 4729 Mat B_mpi; 4730 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4731 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4732 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4733 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4734 PetscInt len,proc,*dnz,*onz,bs,cbs; 4735 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4736 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4737 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4738 MPI_Status *status; 4739 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4740 PetscBT lnkbt; 4741 Mat_Merge_SeqsToMPI *merge; 4742 PetscContainer container; 4743 4744 PetscFunctionBegin; 4745 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4746 4747 /* make sure it is a PETSc comm */ 4748 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4749 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4750 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4751 4752 ierr = PetscNew(&merge);CHKERRQ(ierr); 4753 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4754 4755 /* determine row ownership */ 4756 /*---------------------------------------------------------*/ 4757 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4758 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4759 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4760 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4761 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4762 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4763 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4764 4765 m = merge->rowmap->n; 4766 owners = merge->rowmap->range; 4767 4768 /* determine the number of messages to send, their lengths */ 4769 /*---------------------------------------------------------*/ 4770 len_s = merge->len_s; 4771 4772 len = 0; /* length of buf_si[] */ 4773 merge->nsend = 0; 4774 for (proc=0; proc<size; proc++) { 4775 len_si[proc] = 0; 4776 if (proc == rank) { 4777 len_s[proc] = 0; 4778 } else { 4779 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4780 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4781 } 4782 if (len_s[proc]) { 4783 merge->nsend++; 4784 nrows = 0; 4785 for (i=owners[proc]; i<owners[proc+1]; i++) { 4786 if (ai[i+1] > ai[i]) nrows++; 4787 } 4788 len_si[proc] = 2*(nrows+1); 4789 len += len_si[proc]; 4790 } 4791 } 4792 4793 /* determine the number and length of messages to receive for ij-structure */ 4794 /*-------------------------------------------------------------------------*/ 4795 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4796 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4797 4798 /* post the Irecv of j-structure */ 4799 /*-------------------------------*/ 4800 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4801 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4802 4803 /* post the Isend of j-structure */ 4804 /*--------------------------------*/ 4805 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4806 4807 for (proc=0, k=0; proc<size; proc++) { 4808 if (!len_s[proc]) continue; 4809 i = owners[proc]; 4810 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4811 k++; 4812 } 4813 4814 /* receives and sends of j-structure are complete */ 4815 /*------------------------------------------------*/ 4816 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4817 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4818 4819 /* send and recv i-structure */ 4820 /*---------------------------*/ 4821 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4822 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4823 4824 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4825 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4826 for (proc=0,k=0; proc<size; proc++) { 4827 if (!len_s[proc]) continue; 4828 /* form outgoing message for i-structure: 4829 buf_si[0]: nrows to be sent 4830 [1:nrows]: row index (global) 4831 [nrows+1:2*nrows+1]: i-structure index 4832 */ 4833 /*-------------------------------------------*/ 4834 nrows = len_si[proc]/2 - 1; 4835 buf_si_i = buf_si + nrows+1; 4836 buf_si[0] = nrows; 4837 buf_si_i[0] = 0; 4838 nrows = 0; 4839 for (i=owners[proc]; i<owners[proc+1]; i++) { 4840 anzi = ai[i+1] - ai[i]; 4841 if (anzi) { 4842 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4843 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4844 nrows++; 4845 } 4846 } 4847 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4848 k++; 4849 buf_si += len_si[proc]; 4850 } 4851 4852 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4853 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4854 4855 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4856 for (i=0; i<merge->nrecv; i++) { 4857 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4858 } 4859 4860 ierr = PetscFree(len_si);CHKERRQ(ierr); 4861 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4862 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4863 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4864 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4865 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4866 ierr = PetscFree(status);CHKERRQ(ierr); 4867 4868 /* compute a local seq matrix in each processor */ 4869 /*----------------------------------------------*/ 4870 /* allocate bi array and free space for accumulating nonzero column info */ 4871 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4872 bi[0] = 0; 4873 4874 /* create and initialize a linked list */ 4875 nlnk = N+1; 4876 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4877 4878 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4879 len = ai[owners[rank+1]] - ai[owners[rank]]; 4880 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4881 4882 current_space = free_space; 4883 4884 /* determine symbolic info for each local row */ 4885 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4886 4887 for (k=0; k<merge->nrecv; k++) { 4888 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4889 nrows = *buf_ri_k[k]; 4890 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4891 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4892 } 4893 4894 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4895 len = 0; 4896 for (i=0; i<m; i++) { 4897 bnzi = 0; 4898 /* add local non-zero cols of this proc's seqmat into lnk */ 4899 arow = owners[rank] + i; 4900 anzi = ai[arow+1] - ai[arow]; 4901 aj = a->j + ai[arow]; 4902 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4903 bnzi += nlnk; 4904 /* add received col data into lnk */ 4905 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4906 if (i == *nextrow[k]) { /* i-th row */ 4907 anzi = *(nextai[k]+1) - *nextai[k]; 4908 aj = buf_rj[k] + *nextai[k]; 4909 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4910 bnzi += nlnk; 4911 nextrow[k]++; nextai[k]++; 4912 } 4913 } 4914 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4915 4916 /* if free space is not available, make more free space */ 4917 if (current_space->local_remaining<bnzi) { 4918 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4919 nspacedouble++; 4920 } 4921 /* copy data into free space, then initialize lnk */ 4922 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4923 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4924 4925 current_space->array += bnzi; 4926 current_space->local_used += bnzi; 4927 current_space->local_remaining -= bnzi; 4928 4929 bi[i+1] = bi[i] + bnzi; 4930 } 4931 4932 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4933 4934 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4935 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4936 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4937 4938 /* create symbolic parallel matrix B_mpi */ 4939 /*---------------------------------------*/ 4940 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4941 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4942 if (n==PETSC_DECIDE) { 4943 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4944 } else { 4945 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4946 } 4947 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4948 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4949 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4950 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4951 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4952 4953 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4954 B_mpi->assembled = PETSC_FALSE; 4955 merge->bi = bi; 4956 merge->bj = bj; 4957 merge->buf_ri = buf_ri; 4958 merge->buf_rj = buf_rj; 4959 merge->coi = NULL; 4960 merge->coj = NULL; 4961 merge->owners_co = NULL; 4962 4963 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4964 4965 /* attach the supporting struct to B_mpi for reuse */ 4966 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4967 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4968 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4969 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4970 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4971 *mpimat = B_mpi; 4972 4973 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4974 PetscFunctionReturn(0); 4975 } 4976 4977 /*@C 4978 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4979 matrices from each processor 4980 4981 Collective 4982 4983 Input Parameters: 4984 + comm - the communicators the parallel matrix will live on 4985 . seqmat - the input sequential matrices 4986 . m - number of local rows (or PETSC_DECIDE) 4987 . n - number of local columns (or PETSC_DECIDE) 4988 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4989 4990 Output Parameter: 4991 . mpimat - the parallel matrix generated 4992 4993 Level: advanced 4994 4995 Notes: 4996 The dimensions of the sequential matrix in each processor MUST be the same. 4997 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4998 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4999 @*/ 5000 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5001 { 5002 PetscErrorCode ierr; 5003 PetscMPIInt size; 5004 5005 PetscFunctionBegin; 5006 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5007 if (size == 1) { 5008 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5009 if (scall == MAT_INITIAL_MATRIX) { 5010 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5011 } else { 5012 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5013 } 5014 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5015 PetscFunctionReturn(0); 5016 } 5017 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5018 if (scall == MAT_INITIAL_MATRIX) { 5019 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5020 } 5021 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5022 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5023 PetscFunctionReturn(0); 5024 } 5025 5026 /*@ 5027 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5028 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5029 with MatGetSize() 5030 5031 Not Collective 5032 5033 Input Parameters: 5034 + A - the matrix 5035 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5036 5037 Output Parameter: 5038 . A_loc - the local sequential matrix generated 5039 5040 Level: developer 5041 5042 Notes: 5043 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5044 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5045 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5046 modify the values of the returned A_loc. 5047 5048 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5049 @*/ 5050 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5051 { 5052 PetscErrorCode ierr; 5053 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5054 Mat_SeqAIJ *mat,*a,*b; 5055 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5056 const PetscScalar *aa,*ba,*aav,*bav; 5057 PetscScalar *ca,*cam; 5058 PetscMPIInt size; 5059 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5060 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5061 PetscBool match; 5062 5063 PetscFunctionBegin; 5064 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5065 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5066 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5067 if (size == 1) { 5068 if (scall == MAT_INITIAL_MATRIX) { 5069 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5070 *A_loc = mpimat->A; 5071 } else if (scall == MAT_REUSE_MATRIX) { 5072 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5073 } 5074 PetscFunctionReturn(0); 5075 } 5076 5077 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5078 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5079 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5080 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5081 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5082 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5083 aa = aav; 5084 ba = bav; 5085 if (scall == MAT_INITIAL_MATRIX) { 5086 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5087 ci[0] = 0; 5088 for (i=0; i<am; i++) { 5089 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5090 } 5091 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5092 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5093 k = 0; 5094 for (i=0; i<am; i++) { 5095 ncols_o = bi[i+1] - bi[i]; 5096 ncols_d = ai[i+1] - ai[i]; 5097 /* off-diagonal portion of A */ 5098 for (jo=0; jo<ncols_o; jo++) { 5099 col = cmap[*bj]; 5100 if (col >= cstart) break; 5101 cj[k] = col; bj++; 5102 ca[k++] = *ba++; 5103 } 5104 /* diagonal portion of A */ 5105 for (j=0; j<ncols_d; j++) { 5106 cj[k] = cstart + *aj++; 5107 ca[k++] = *aa++; 5108 } 5109 /* off-diagonal portion of A */ 5110 for (j=jo; j<ncols_o; j++) { 5111 cj[k] = cmap[*bj++]; 5112 ca[k++] = *ba++; 5113 } 5114 } 5115 /* put together the new matrix */ 5116 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5117 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5118 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5119 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5120 mat->free_a = PETSC_TRUE; 5121 mat->free_ij = PETSC_TRUE; 5122 mat->nonew = 0; 5123 } else if (scall == MAT_REUSE_MATRIX) { 5124 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5125 #if defined(PETSC_USE_DEVICE) 5126 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5127 #endif 5128 ci = mat->i; cj = mat->j; cam = mat->a; 5129 for (i=0; i<am; i++) { 5130 /* off-diagonal portion of A */ 5131 ncols_o = bi[i+1] - bi[i]; 5132 for (jo=0; jo<ncols_o; jo++) { 5133 col = cmap[*bj]; 5134 if (col >= cstart) break; 5135 *cam++ = *ba++; bj++; 5136 } 5137 /* diagonal portion of A */ 5138 ncols_d = ai[i+1] - ai[i]; 5139 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5140 /* off-diagonal portion of A */ 5141 for (j=jo; j<ncols_o; j++) { 5142 *cam++ = *ba++; bj++; 5143 } 5144 } 5145 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5146 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5147 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5148 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5149 PetscFunctionReturn(0); 5150 } 5151 5152 /*@ 5153 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5154 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5155 5156 Not Collective 5157 5158 Input Parameters: 5159 + A - the matrix 5160 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5161 5162 Output Parameters: 5163 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5164 - A_loc - the local sequential matrix generated 5165 5166 Level: developer 5167 5168 Notes: 5169 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5170 5171 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5172 5173 @*/ 5174 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5175 { 5176 PetscErrorCode ierr; 5177 Mat Ao,Ad; 5178 const PetscInt *cmap; 5179 PetscMPIInt size; 5180 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5181 5182 PetscFunctionBegin; 5183 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5184 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5185 if (size == 1) { 5186 if (scall == MAT_INITIAL_MATRIX) { 5187 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5188 *A_loc = Ad; 5189 } else if (scall == MAT_REUSE_MATRIX) { 5190 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5191 } 5192 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5193 PetscFunctionReturn(0); 5194 } 5195 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5196 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5197 if (f) { 5198 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5199 } else { 5200 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5201 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5202 Mat_SeqAIJ *c; 5203 PetscInt *ai = a->i, *aj = a->j; 5204 PetscInt *bi = b->i, *bj = b->j; 5205 PetscInt *ci,*cj; 5206 const PetscScalar *aa,*ba; 5207 PetscScalar *ca; 5208 PetscInt i,j,am,dn,on; 5209 5210 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5211 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5212 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5213 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5214 if (scall == MAT_INITIAL_MATRIX) { 5215 PetscInt k; 5216 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5217 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5218 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5219 ci[0] = 0; 5220 for (i=0,k=0; i<am; i++) { 5221 const PetscInt ncols_o = bi[i+1] - bi[i]; 5222 const PetscInt ncols_d = ai[i+1] - ai[i]; 5223 ci[i+1] = ci[i] + ncols_o + ncols_d; 5224 /* diagonal portion of A */ 5225 for (j=0; j<ncols_d; j++,k++) { 5226 cj[k] = *aj++; 5227 ca[k] = *aa++; 5228 } 5229 /* off-diagonal portion of A */ 5230 for (j=0; j<ncols_o; j++,k++) { 5231 cj[k] = dn + *bj++; 5232 ca[k] = *ba++; 5233 } 5234 } 5235 /* put together the new matrix */ 5236 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5237 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5238 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5239 c = (Mat_SeqAIJ*)(*A_loc)->data; 5240 c->free_a = PETSC_TRUE; 5241 c->free_ij = PETSC_TRUE; 5242 c->nonew = 0; 5243 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5244 } else if (scall == MAT_REUSE_MATRIX) { 5245 #if defined(PETSC_HAVE_DEVICE) 5246 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5247 #endif 5248 c = (Mat_SeqAIJ*)(*A_loc)->data; 5249 ca = c->a; 5250 for (i=0; i<am; i++) { 5251 const PetscInt ncols_d = ai[i+1] - ai[i]; 5252 const PetscInt ncols_o = bi[i+1] - bi[i]; 5253 /* diagonal portion of A */ 5254 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5255 /* off-diagonal portion of A */ 5256 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5257 } 5258 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5259 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5260 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5261 if (glob) { 5262 PetscInt cst, *gidx; 5263 5264 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5265 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5266 for (i=0; i<dn; i++) gidx[i] = cst + i; 5267 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5268 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5269 } 5270 } 5271 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5272 PetscFunctionReturn(0); 5273 } 5274 5275 /*@C 5276 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5277 5278 Not Collective 5279 5280 Input Parameters: 5281 + A - the matrix 5282 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5283 - row, col - index sets of rows and columns to extract (or NULL) 5284 5285 Output Parameter: 5286 . A_loc - the local sequential matrix generated 5287 5288 Level: developer 5289 5290 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5291 5292 @*/ 5293 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5294 { 5295 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5296 PetscErrorCode ierr; 5297 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5298 IS isrowa,iscola; 5299 Mat *aloc; 5300 PetscBool match; 5301 5302 PetscFunctionBegin; 5303 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5304 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5305 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5306 if (!row) { 5307 start = A->rmap->rstart; end = A->rmap->rend; 5308 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5309 } else { 5310 isrowa = *row; 5311 } 5312 if (!col) { 5313 start = A->cmap->rstart; 5314 cmap = a->garray; 5315 nzA = a->A->cmap->n; 5316 nzB = a->B->cmap->n; 5317 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5318 ncols = 0; 5319 for (i=0; i<nzB; i++) { 5320 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5321 else break; 5322 } 5323 imark = i; 5324 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5325 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5326 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5327 } else { 5328 iscola = *col; 5329 } 5330 if (scall != MAT_INITIAL_MATRIX) { 5331 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5332 aloc[0] = *A_loc; 5333 } 5334 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5335 if (!col) { /* attach global id of condensed columns */ 5336 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5337 } 5338 *A_loc = aloc[0]; 5339 ierr = PetscFree(aloc);CHKERRQ(ierr); 5340 if (!row) { 5341 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5342 } 5343 if (!col) { 5344 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5345 } 5346 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5347 PetscFunctionReturn(0); 5348 } 5349 5350 /* 5351 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5352 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5353 * on a global size. 5354 * */ 5355 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5356 { 5357 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5358 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5359 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5360 PetscMPIInt owner; 5361 PetscSFNode *iremote,*oiremote; 5362 const PetscInt *lrowindices; 5363 PetscErrorCode ierr; 5364 PetscSF sf,osf; 5365 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5366 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5367 MPI_Comm comm; 5368 ISLocalToGlobalMapping mapping; 5369 5370 PetscFunctionBegin; 5371 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5372 /* plocalsize is the number of roots 5373 * nrows is the number of leaves 5374 * */ 5375 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5376 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5377 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5378 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5379 for (i=0;i<nrows;i++) { 5380 /* Find a remote index and an owner for a row 5381 * The row could be local or remote 5382 * */ 5383 owner = 0; 5384 lidx = 0; 5385 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5386 iremote[i].index = lidx; 5387 iremote[i].rank = owner; 5388 } 5389 /* Create SF to communicate how many nonzero columns for each row */ 5390 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5391 /* SF will figure out the number of nonzero colunms for each row, and their 5392 * offsets 5393 * */ 5394 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5395 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5396 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5397 5398 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5399 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5400 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5401 roffsets[0] = 0; 5402 roffsets[1] = 0; 5403 for (i=0;i<plocalsize;i++) { 5404 /* diag */ 5405 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5406 /* off diag */ 5407 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5408 /* compute offsets so that we relative location for each row */ 5409 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5410 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5411 } 5412 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5413 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5414 /* 'r' means root, and 'l' means leaf */ 5415 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5416 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5417 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5418 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5419 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5420 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5421 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5422 dntotalcols = 0; 5423 ontotalcols = 0; 5424 ncol = 0; 5425 for (i=0;i<nrows;i++) { 5426 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5427 ncol = PetscMax(pnnz[i],ncol); 5428 /* diag */ 5429 dntotalcols += nlcols[i*2+0]; 5430 /* off diag */ 5431 ontotalcols += nlcols[i*2+1]; 5432 } 5433 /* We do not need to figure the right number of columns 5434 * since all the calculations will be done by going through the raw data 5435 * */ 5436 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5437 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5438 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5439 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5440 /* diag */ 5441 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5442 /* off diag */ 5443 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5444 /* diag */ 5445 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5446 /* off diag */ 5447 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5448 dntotalcols = 0; 5449 ontotalcols = 0; 5450 ntotalcols = 0; 5451 for (i=0;i<nrows;i++) { 5452 owner = 0; 5453 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5454 /* Set iremote for diag matrix */ 5455 for (j=0;j<nlcols[i*2+0];j++) { 5456 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5457 iremote[dntotalcols].rank = owner; 5458 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5459 ilocal[dntotalcols++] = ntotalcols++; 5460 } 5461 /* off diag */ 5462 for (j=0;j<nlcols[i*2+1];j++) { 5463 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5464 oiremote[ontotalcols].rank = owner; 5465 oilocal[ontotalcols++] = ntotalcols++; 5466 } 5467 } 5468 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5469 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5470 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5471 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5472 /* P serves as roots and P_oth is leaves 5473 * Diag matrix 5474 * */ 5475 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5476 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5477 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5478 5479 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5480 /* Off diag */ 5481 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5482 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5483 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5484 /* We operate on the matrix internal data for saving memory */ 5485 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5486 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5487 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5488 /* Convert to global indices for diag matrix */ 5489 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5490 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5491 /* We want P_oth store global indices */ 5492 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5493 /* Use memory scalable approach */ 5494 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5495 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5496 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5497 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5498 /* Convert back to local indices */ 5499 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5500 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5501 nout = 0; 5502 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5503 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5504 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5505 /* Exchange values */ 5506 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5507 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5508 /* Stop PETSc from shrinking memory */ 5509 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5510 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5511 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5512 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5513 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5514 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5515 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5516 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5517 PetscFunctionReturn(0); 5518 } 5519 5520 /* 5521 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5522 * This supports MPIAIJ and MAIJ 5523 * */ 5524 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5525 { 5526 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5527 Mat_SeqAIJ *p_oth; 5528 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5529 IS rows,map; 5530 PetscHMapI hamp; 5531 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5532 MPI_Comm comm; 5533 PetscSF sf,osf; 5534 PetscBool has; 5535 PetscErrorCode ierr; 5536 5537 PetscFunctionBegin; 5538 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5539 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5540 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5541 * and then create a submatrix (that often is an overlapping matrix) 5542 * */ 5543 if (reuse == MAT_INITIAL_MATRIX) { 5544 /* Use a hash table to figure out unique keys */ 5545 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5546 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5547 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5548 count = 0; 5549 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5550 for (i=0;i<a->B->cmap->n;i++) { 5551 key = a->garray[i]/dof; 5552 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5553 if (!has) { 5554 mapping[i] = count; 5555 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5556 } else { 5557 /* Current 'i' has the same value the previous step */ 5558 mapping[i] = count-1; 5559 } 5560 } 5561 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5562 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5563 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count); 5564 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5565 off = 0; 5566 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5567 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5568 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5569 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5570 /* In case, the matrix was already created but users want to recreate the matrix */ 5571 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5572 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5573 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5574 ierr = ISDestroy(&map);CHKERRQ(ierr); 5575 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5576 } else if (reuse == MAT_REUSE_MATRIX) { 5577 /* If matrix was already created, we simply update values using SF objects 5578 * that as attached to the matrix ealier. 5579 * */ 5580 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5581 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5582 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5583 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5584 /* Update values in place */ 5585 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5586 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5587 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5588 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5589 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5590 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5591 PetscFunctionReturn(0); 5592 } 5593 5594 /*@C 5595 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5596 5597 Collective on Mat 5598 5599 Input Parameters: 5600 + A - the first matrix in mpiaij format 5601 . B - the second matrix in mpiaij format 5602 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5603 5604 Input/Output Parameters: 5605 + rowb - index sets of rows of B to extract (or NULL), modified on output 5606 - colb - index sets of columns of B to extract (or NULL), modified on output 5607 5608 Output Parameter: 5609 . B_seq - the sequential matrix generated 5610 5611 Level: developer 5612 5613 @*/ 5614 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5615 { 5616 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5617 PetscErrorCode ierr; 5618 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5619 IS isrowb,iscolb; 5620 Mat *bseq=NULL; 5621 5622 PetscFunctionBegin; 5623 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5624 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5625 } 5626 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5627 5628 if (scall == MAT_INITIAL_MATRIX) { 5629 start = A->cmap->rstart; 5630 cmap = a->garray; 5631 nzA = a->A->cmap->n; 5632 nzB = a->B->cmap->n; 5633 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5634 ncols = 0; 5635 for (i=0; i<nzB; i++) { /* row < local row index */ 5636 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5637 else break; 5638 } 5639 imark = i; 5640 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5641 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5642 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5643 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5644 } else { 5645 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5646 isrowb = *rowb; iscolb = *colb; 5647 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5648 bseq[0] = *B_seq; 5649 } 5650 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5651 *B_seq = bseq[0]; 5652 ierr = PetscFree(bseq);CHKERRQ(ierr); 5653 if (!rowb) { 5654 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5655 } else { 5656 *rowb = isrowb; 5657 } 5658 if (!colb) { 5659 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5660 } else { 5661 *colb = iscolb; 5662 } 5663 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5664 PetscFunctionReturn(0); 5665 } 5666 5667 /* 5668 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5669 of the OFF-DIAGONAL portion of local A 5670 5671 Collective on Mat 5672 5673 Input Parameters: 5674 + A,B - the matrices in mpiaij format 5675 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5676 5677 Output Parameter: 5678 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5679 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5680 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5681 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5682 5683 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5684 for this matrix. This is not desirable.. 5685 5686 Level: developer 5687 5688 */ 5689 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5690 { 5691 PetscErrorCode ierr; 5692 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5693 Mat_SeqAIJ *b_oth; 5694 VecScatter ctx; 5695 MPI_Comm comm; 5696 const PetscMPIInt *rprocs,*sprocs; 5697 const PetscInt *srow,*rstarts,*sstarts; 5698 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5699 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5700 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5701 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5702 PetscMPIInt size,tag,rank,nreqs; 5703 5704 PetscFunctionBegin; 5705 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5706 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5707 5708 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5709 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5710 } 5711 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5712 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5713 5714 if (size == 1) { 5715 startsj_s = NULL; 5716 bufa_ptr = NULL; 5717 *B_oth = NULL; 5718 PetscFunctionReturn(0); 5719 } 5720 5721 ctx = a->Mvctx; 5722 tag = ((PetscObject)ctx)->tag; 5723 5724 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5725 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5726 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5727 ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr); 5728 ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr); 5729 rwaits = reqs; 5730 swaits = reqs + nrecvs; 5731 5732 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5733 if (scall == MAT_INITIAL_MATRIX) { 5734 /* i-array */ 5735 /*---------*/ 5736 /* post receives */ 5737 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5738 for (i=0; i<nrecvs; i++) { 5739 rowlen = rvalues + rstarts[i]*rbs; 5740 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5741 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5742 } 5743 5744 /* pack the outgoing message */ 5745 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5746 5747 sstartsj[0] = 0; 5748 rstartsj[0] = 0; 5749 len = 0; /* total length of j or a array to be sent */ 5750 if (nsends) { 5751 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5752 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5753 } 5754 for (i=0; i<nsends; i++) { 5755 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5756 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5757 for (j=0; j<nrows; j++) { 5758 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5759 for (l=0; l<sbs; l++) { 5760 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5761 5762 rowlen[j*sbs+l] = ncols; 5763 5764 len += ncols; 5765 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5766 } 5767 k++; 5768 } 5769 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5770 5771 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5772 } 5773 /* recvs and sends of i-array are completed */ 5774 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5775 ierr = PetscFree(svalues);CHKERRQ(ierr); 5776 5777 /* allocate buffers for sending j and a arrays */ 5778 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5779 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5780 5781 /* create i-array of B_oth */ 5782 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5783 5784 b_othi[0] = 0; 5785 len = 0; /* total length of j or a array to be received */ 5786 k = 0; 5787 for (i=0; i<nrecvs; i++) { 5788 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5789 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5790 for (j=0; j<nrows; j++) { 5791 b_othi[k+1] = b_othi[k] + rowlen[j]; 5792 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5793 k++; 5794 } 5795 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5796 } 5797 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5798 5799 /* allocate space for j and a arrrays of B_oth */ 5800 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5801 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5802 5803 /* j-array */ 5804 /*---------*/ 5805 /* post receives of j-array */ 5806 for (i=0; i<nrecvs; i++) { 5807 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5808 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5809 } 5810 5811 /* pack the outgoing message j-array */ 5812 if (nsends) k = sstarts[0]; 5813 for (i=0; i<nsends; i++) { 5814 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5815 bufJ = bufj+sstartsj[i]; 5816 for (j=0; j<nrows; j++) { 5817 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5818 for (ll=0; ll<sbs; ll++) { 5819 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5820 for (l=0; l<ncols; l++) { 5821 *bufJ++ = cols[l]; 5822 } 5823 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5824 } 5825 } 5826 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5827 } 5828 5829 /* recvs and sends of j-array are completed */ 5830 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5831 } else if (scall == MAT_REUSE_MATRIX) { 5832 sstartsj = *startsj_s; 5833 rstartsj = *startsj_r; 5834 bufa = *bufa_ptr; 5835 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5836 b_otha = b_oth->a; 5837 #if defined(PETSC_HAVE_DEVICE) 5838 (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU; 5839 #endif 5840 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5841 5842 /* a-array */ 5843 /*---------*/ 5844 /* post receives of a-array */ 5845 for (i=0; i<nrecvs; i++) { 5846 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5847 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5848 } 5849 5850 /* pack the outgoing message a-array */ 5851 if (nsends) k = sstarts[0]; 5852 for (i=0; i<nsends; i++) { 5853 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5854 bufA = bufa+sstartsj[i]; 5855 for (j=0; j<nrows; j++) { 5856 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5857 for (ll=0; ll<sbs; ll++) { 5858 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5859 for (l=0; l<ncols; l++) { 5860 *bufA++ = vals[l]; 5861 } 5862 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5863 } 5864 } 5865 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5866 } 5867 /* recvs and sends of a-array are completed */ 5868 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5869 ierr = PetscFree(reqs);CHKERRQ(ierr); 5870 5871 if (scall == MAT_INITIAL_MATRIX) { 5872 /* put together the new matrix */ 5873 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5874 5875 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5876 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5877 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5878 b_oth->free_a = PETSC_TRUE; 5879 b_oth->free_ij = PETSC_TRUE; 5880 b_oth->nonew = 0; 5881 5882 ierr = PetscFree(bufj);CHKERRQ(ierr); 5883 if (!startsj_s || !bufa_ptr) { 5884 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5885 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5886 } else { 5887 *startsj_s = sstartsj; 5888 *startsj_r = rstartsj; 5889 *bufa_ptr = bufa; 5890 } 5891 } 5892 5893 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5894 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5895 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5896 PetscFunctionReturn(0); 5897 } 5898 5899 /*@C 5900 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5901 5902 Not Collective 5903 5904 Input Parameter: 5905 . A - The matrix in mpiaij format 5906 5907 Output Parameters: 5908 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5909 . colmap - A map from global column index to local index into lvec 5910 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5911 5912 Level: developer 5913 5914 @*/ 5915 #if defined(PETSC_USE_CTABLE) 5916 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5917 #else 5918 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5919 #endif 5920 { 5921 Mat_MPIAIJ *a; 5922 5923 PetscFunctionBegin; 5924 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5925 PetscValidPointer(lvec, 2); 5926 PetscValidPointer(colmap, 3); 5927 PetscValidPointer(multScatter, 4); 5928 a = (Mat_MPIAIJ*) A->data; 5929 if (lvec) *lvec = a->lvec; 5930 if (colmap) *colmap = a->colmap; 5931 if (multScatter) *multScatter = a->Mvctx; 5932 PetscFunctionReturn(0); 5933 } 5934 5935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5937 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5938 #if defined(PETSC_HAVE_MKL_SPARSE) 5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5940 #endif 5941 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5942 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5943 #if defined(PETSC_HAVE_ELEMENTAL) 5944 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5945 #endif 5946 #if defined(PETSC_HAVE_SCALAPACK) 5947 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5948 #endif 5949 #if defined(PETSC_HAVE_HYPRE) 5950 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5951 #endif 5952 #if defined(PETSC_HAVE_CUDA) 5953 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5954 #endif 5955 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5956 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5957 #endif 5958 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5959 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5960 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5961 5962 /* 5963 Computes (B'*A')' since computing B*A directly is untenable 5964 5965 n p p 5966 [ ] [ ] [ ] 5967 m [ A ] * n [ B ] = m [ C ] 5968 [ ] [ ] [ ] 5969 5970 */ 5971 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5972 { 5973 PetscErrorCode ierr; 5974 Mat At,Bt,Ct; 5975 5976 PetscFunctionBegin; 5977 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5978 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5979 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5980 ierr = MatDestroy(&At);CHKERRQ(ierr); 5981 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5982 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5983 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5984 PetscFunctionReturn(0); 5985 } 5986 5987 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5988 { 5989 PetscErrorCode ierr; 5990 PetscBool cisdense; 5991 5992 PetscFunctionBegin; 5993 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5994 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5995 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5996 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5997 if (!cisdense) { 5998 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5999 } 6000 ierr = MatSetUp(C);CHKERRQ(ierr); 6001 6002 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6003 PetscFunctionReturn(0); 6004 } 6005 6006 /* ----------------------------------------------------------------*/ 6007 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6008 { 6009 Mat_Product *product = C->product; 6010 Mat A = product->A,B=product->B; 6011 6012 PetscFunctionBegin; 6013 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6014 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6015 6016 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6017 C->ops->productsymbolic = MatProductSymbolic_AB; 6018 PetscFunctionReturn(0); 6019 } 6020 6021 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6022 { 6023 PetscErrorCode ierr; 6024 Mat_Product *product = C->product; 6025 6026 PetscFunctionBegin; 6027 if (product->type == MATPRODUCT_AB) { 6028 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6029 } 6030 PetscFunctionReturn(0); 6031 } 6032 /* ----------------------------------------------------------------*/ 6033 6034 /*MC 6035 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6036 6037 Options Database Keys: 6038 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6039 6040 Level: beginner 6041 6042 Notes: 6043 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6044 in this case the values associated with the rows and columns one passes in are set to zero 6045 in the matrix 6046 6047 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6048 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6049 6050 .seealso: MatCreateAIJ() 6051 M*/ 6052 6053 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6054 { 6055 Mat_MPIAIJ *b; 6056 PetscErrorCode ierr; 6057 PetscMPIInt size; 6058 6059 PetscFunctionBegin; 6060 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6061 6062 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6063 B->data = (void*)b; 6064 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6065 B->assembled = PETSC_FALSE; 6066 B->insertmode = NOT_SET_VALUES; 6067 b->size = size; 6068 6069 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6070 6071 /* build cache for off array entries formed */ 6072 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6073 6074 b->donotstash = PETSC_FALSE; 6075 b->colmap = NULL; 6076 b->garray = NULL; 6077 b->roworiented = PETSC_TRUE; 6078 6079 /* stuff used for matrix vector multiply */ 6080 b->lvec = NULL; 6081 b->Mvctx = NULL; 6082 6083 /* stuff for MatGetRow() */ 6084 b->rowindices = NULL; 6085 b->rowvalues = NULL; 6086 b->getrowactive = PETSC_FALSE; 6087 6088 /* flexible pointer used in CUSPARSE classes */ 6089 b->spptr = NULL; 6090 6091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6101 #if defined(PETSC_HAVE_CUDA) 6102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6103 #endif 6104 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6106 #endif 6107 #if defined(PETSC_HAVE_MKL_SPARSE) 6108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6109 #endif 6110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 6114 #if defined(PETSC_HAVE_ELEMENTAL) 6115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6116 #endif 6117 #if defined(PETSC_HAVE_SCALAPACK) 6118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6119 #endif 6120 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6121 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6122 #if defined(PETSC_HAVE_HYPRE) 6123 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6124 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6125 #endif 6126 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6127 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6128 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6129 PetscFunctionReturn(0); 6130 } 6131 6132 /*@C 6133 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6134 and "off-diagonal" part of the matrix in CSR format. 6135 6136 Collective 6137 6138 Input Parameters: 6139 + comm - MPI communicator 6140 . m - number of local rows (Cannot be PETSC_DECIDE) 6141 . n - This value should be the same as the local size used in creating the 6142 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6143 calculated if N is given) For square matrices n is almost always m. 6144 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6145 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6146 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6147 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6148 . a - matrix values 6149 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6150 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6151 - oa - matrix values 6152 6153 Output Parameter: 6154 . mat - the matrix 6155 6156 Level: advanced 6157 6158 Notes: 6159 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6160 must free the arrays once the matrix has been destroyed and not before. 6161 6162 The i and j indices are 0 based 6163 6164 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6165 6166 This sets local rows and cannot be used to set off-processor values. 6167 6168 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6169 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6170 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6171 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6172 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6173 communication if it is known that only local entries will be set. 6174 6175 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6176 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6177 @*/ 6178 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6179 { 6180 PetscErrorCode ierr; 6181 Mat_MPIAIJ *maij; 6182 6183 PetscFunctionBegin; 6184 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6185 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6186 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6187 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6188 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6189 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6190 maij = (Mat_MPIAIJ*) (*mat)->data; 6191 6192 (*mat)->preallocated = PETSC_TRUE; 6193 6194 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6195 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6196 6197 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6198 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6199 6200 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6201 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6202 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6203 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6204 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6205 PetscFunctionReturn(0); 6206 } 6207 6208 /* 6209 Special version for direct calls from Fortran 6210 */ 6211 #include <petsc/private/fortranimpl.h> 6212 6213 /* Change these macros so can be used in void function */ 6214 #undef CHKERRQ 6215 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6216 #undef SETERRQ2 6217 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6218 #undef SETERRQ3 6219 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6220 #undef SETERRQ 6221 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6222 6223 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6224 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6225 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6226 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6227 #else 6228 #endif 6229 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6230 { 6231 Mat mat = *mmat; 6232 PetscInt m = *mm, n = *mn; 6233 InsertMode addv = *maddv; 6234 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6235 PetscScalar value; 6236 PetscErrorCode ierr; 6237 6238 MatCheckPreallocated(mat,1); 6239 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6240 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6241 { 6242 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6243 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6244 PetscBool roworiented = aij->roworiented; 6245 6246 /* Some Variables required in the macro */ 6247 Mat A = aij->A; 6248 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6249 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6250 MatScalar *aa = a->a; 6251 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6252 Mat B = aij->B; 6253 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6254 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6255 MatScalar *ba = b->a; 6256 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6257 * cannot use "#if defined" inside a macro. */ 6258 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6259 6260 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6261 PetscInt nonew = a->nonew; 6262 MatScalar *ap1,*ap2; 6263 6264 PetscFunctionBegin; 6265 for (i=0; i<m; i++) { 6266 if (im[i] < 0) continue; 6267 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6268 if (im[i] >= rstart && im[i] < rend) { 6269 row = im[i] - rstart; 6270 lastcol1 = -1; 6271 rp1 = aj + ai[row]; 6272 ap1 = aa + ai[row]; 6273 rmax1 = aimax[row]; 6274 nrow1 = ailen[row]; 6275 low1 = 0; 6276 high1 = nrow1; 6277 lastcol2 = -1; 6278 rp2 = bj + bi[row]; 6279 ap2 = ba + bi[row]; 6280 rmax2 = bimax[row]; 6281 nrow2 = bilen[row]; 6282 low2 = 0; 6283 high2 = nrow2; 6284 6285 for (j=0; j<n; j++) { 6286 if (roworiented) value = v[i*n+j]; 6287 else value = v[i+j*m]; 6288 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6289 if (in[j] >= cstart && in[j] < cend) { 6290 col = in[j] - cstart; 6291 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6292 #if defined(PETSC_HAVE_DEVICE) 6293 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6294 #endif 6295 } else if (in[j] < 0) continue; 6296 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6297 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6298 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6299 } else { 6300 if (mat->was_assembled) { 6301 if (!aij->colmap) { 6302 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6303 } 6304 #if defined(PETSC_USE_CTABLE) 6305 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6306 col--; 6307 #else 6308 col = aij->colmap[in[j]] - 1; 6309 #endif 6310 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6311 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6312 col = in[j]; 6313 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6314 B = aij->B; 6315 b = (Mat_SeqAIJ*)B->data; 6316 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6317 rp2 = bj + bi[row]; 6318 ap2 = ba + bi[row]; 6319 rmax2 = bimax[row]; 6320 nrow2 = bilen[row]; 6321 low2 = 0; 6322 high2 = nrow2; 6323 bm = aij->B->rmap->n; 6324 ba = b->a; 6325 inserted = PETSC_FALSE; 6326 } 6327 } else col = in[j]; 6328 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6329 #if defined(PETSC_HAVE_DEVICE) 6330 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6331 #endif 6332 } 6333 } 6334 } else if (!aij->donotstash) { 6335 if (roworiented) { 6336 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6337 } else { 6338 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6339 } 6340 } 6341 } 6342 } 6343 PetscFunctionReturnVoid(); 6344 } 6345 6346 typedef struct { 6347 Mat *mp; /* intermediate products */ 6348 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6349 PetscInt cp; /* number of intermediate products */ 6350 6351 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6352 PetscInt *startsj_s,*startsj_r; 6353 PetscScalar *bufa; 6354 Mat P_oth; 6355 6356 /* may take advantage of merging product->B */ 6357 Mat Bloc; /* B-local by merging diag and off-diag */ 6358 6359 /* cusparse does not have support to split between symbolic and numeric phases. 6360 When api_user is true, we don't need to update the numerical values 6361 of the temporary storage */ 6362 PetscBool reusesym; 6363 6364 /* support for COO values insertion */ 6365 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6366 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6367 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6368 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6369 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6370 PetscMemType mtype; 6371 6372 /* customization */ 6373 PetscBool abmerge; 6374 PetscBool P_oth_bind; 6375 } MatMatMPIAIJBACKEND; 6376 6377 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6378 { 6379 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6380 PetscInt i; 6381 PetscErrorCode ierr; 6382 6383 PetscFunctionBegin; 6384 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6385 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6386 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6387 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6388 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6389 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6390 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6391 for (i = 0; i < mmdata->cp; i++) { 6392 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6393 } 6394 ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr); 6395 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6396 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6397 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6398 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6399 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6400 PetscFunctionReturn(0); 6401 } 6402 6403 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6404 { 6405 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6406 PetscErrorCode ierr; 6407 6408 PetscFunctionBegin; 6409 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6410 if (f) { 6411 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6412 } else { 6413 const PetscScalar *vv; 6414 6415 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6416 if (n && idx) { 6417 PetscScalar *w = v; 6418 const PetscInt *oi = idx; 6419 PetscInt j; 6420 6421 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6422 } else { 6423 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6424 } 6425 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6426 } 6427 PetscFunctionReturn(0); 6428 } 6429 6430 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6431 { 6432 MatMatMPIAIJBACKEND *mmdata; 6433 PetscInt i,n_d,n_o; 6434 PetscErrorCode ierr; 6435 6436 PetscFunctionBegin; 6437 MatCheckProduct(C,1); 6438 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6439 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6440 if (!mmdata->reusesym) { /* update temporary matrices */ 6441 if (mmdata->P_oth) { 6442 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6443 } 6444 if (mmdata->Bloc) { 6445 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6446 } 6447 } 6448 mmdata->reusesym = PETSC_FALSE; 6449 6450 for (i = 0; i < mmdata->cp; i++) { 6451 if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6452 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6453 } 6454 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6455 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6456 6457 if (mmdata->mptmp[i]) continue; 6458 if (noff) { 6459 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6460 6461 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6462 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6463 n_o += noff; 6464 n_d += nown; 6465 } else { 6466 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6467 6468 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6469 n_d += mm->nz; 6470 } 6471 } 6472 if (mmdata->hasoffproc) { /* offprocess insertion */ 6473 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6474 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6475 } 6476 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6477 PetscFunctionReturn(0); 6478 } 6479 6480 /* Support for Pt * A, A * P, or Pt * A * P */ 6481 #define MAX_NUMBER_INTERMEDIATE 4 6482 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6483 { 6484 Mat_Product *product = C->product; 6485 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6486 Mat_MPIAIJ *a,*p; 6487 MatMatMPIAIJBACKEND *mmdata; 6488 ISLocalToGlobalMapping P_oth_l2g = NULL; 6489 IS glob = NULL; 6490 const char *prefix; 6491 char pprefix[256]; 6492 const PetscInt *globidx,*P_oth_idx; 6493 PetscInt i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j; 6494 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6495 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6496 /* a base offset; type-2: sparse with a local to global map table */ 6497 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6498 6499 MatProductType ptype; 6500 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6501 PetscMPIInt size; 6502 PetscErrorCode ierr; 6503 6504 PetscFunctionBegin; 6505 MatCheckProduct(C,1); 6506 if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6507 ptype = product->type; 6508 if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB; 6509 switch (ptype) { 6510 case MATPRODUCT_AB: 6511 A = product->A; 6512 P = product->B; 6513 m = A->rmap->n; 6514 n = P->cmap->n; 6515 M = A->rmap->N; 6516 N = P->cmap->N; 6517 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6518 break; 6519 case MATPRODUCT_AtB: 6520 P = product->A; 6521 A = product->B; 6522 m = P->cmap->n; 6523 n = A->cmap->n; 6524 M = P->cmap->N; 6525 N = A->cmap->N; 6526 hasoffproc = PETSC_TRUE; 6527 break; 6528 case MATPRODUCT_PtAP: 6529 A = product->A; 6530 P = product->B; 6531 m = P->cmap->n; 6532 n = P->cmap->n; 6533 M = P->cmap->N; 6534 N = P->cmap->N; 6535 hasoffproc = PETSC_TRUE; 6536 break; 6537 default: 6538 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6539 } 6540 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr); 6541 if (size == 1) hasoffproc = PETSC_FALSE; 6542 6543 /* defaults */ 6544 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6545 mp[i] = NULL; 6546 mptmp[i] = PETSC_FALSE; 6547 rmapt[i] = -1; 6548 cmapt[i] = -1; 6549 rmapa[i] = NULL; 6550 cmapa[i] = NULL; 6551 } 6552 6553 /* customization */ 6554 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 6555 mmdata->reusesym = product->api_user; 6556 if (ptype == MATPRODUCT_AB) { 6557 if (product->api_user) { 6558 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6559 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6560 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6561 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6562 } else { 6563 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6564 ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6565 ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6566 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6567 } 6568 } else if (ptype == MATPRODUCT_PtAP) { 6569 if (product->api_user) { 6570 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6571 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6572 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6573 } else { 6574 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6575 ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6576 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6577 } 6578 } 6579 a = (Mat_MPIAIJ*)A->data; 6580 p = (Mat_MPIAIJ*)P->data; 6581 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 6582 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 6583 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 6584 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6585 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 6586 6587 cp = 0; 6588 switch (ptype) { 6589 case MATPRODUCT_AB: /* A * P */ 6590 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6591 6592 /* A_diag * P_local (merged or not) */ 6593 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 6594 /* P is product->B */ 6595 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6596 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6597 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6598 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6599 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6600 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6601 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6602 mp[cp]->product->api_user = product->api_user; 6603 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6604 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6605 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6606 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6607 rmapt[cp] = 1; 6608 cmapt[cp] = 2; 6609 cmapa[cp] = globidx; 6610 mptmp[cp] = PETSC_FALSE; 6611 cp++; 6612 } else { /* A_diag * P_diag and A_diag * P_off */ 6613 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 6614 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6615 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6616 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6617 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6618 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6619 mp[cp]->product->api_user = product->api_user; 6620 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6621 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6622 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6623 rmapt[cp] = 1; 6624 cmapt[cp] = 1; 6625 mptmp[cp] = PETSC_FALSE; 6626 cp++; 6627 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 6628 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6629 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6630 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6631 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6632 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6633 mp[cp]->product->api_user = product->api_user; 6634 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6635 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6636 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6637 rmapt[cp] = 1; 6638 cmapt[cp] = 2; 6639 cmapa[cp] = p->garray; 6640 mptmp[cp] = PETSC_FALSE; 6641 cp++; 6642 } 6643 6644 /* A_off * P_other */ 6645 if (mmdata->P_oth) { 6646 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */ 6647 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6648 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6649 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6650 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6651 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6652 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6653 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6654 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6655 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6656 mp[cp]->product->api_user = product->api_user; 6657 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6658 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6659 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6660 rmapt[cp] = 1; 6661 cmapt[cp] = 2; 6662 cmapa[cp] = P_oth_idx; 6663 mptmp[cp] = PETSC_FALSE; 6664 cp++; 6665 } 6666 break; 6667 6668 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6669 /* A is product->B */ 6670 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6671 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 6672 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6673 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6674 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6675 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6676 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6677 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6678 mp[cp]->product->api_user = product->api_user; 6679 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6680 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6681 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6682 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6683 rmapt[cp] = 2; 6684 rmapa[cp] = globidx; 6685 cmapt[cp] = 2; 6686 cmapa[cp] = globidx; 6687 mptmp[cp] = PETSC_FALSE; 6688 cp++; 6689 } else { 6690 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6691 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6692 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6693 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6694 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6695 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6696 mp[cp]->product->api_user = product->api_user; 6697 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6698 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6699 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6700 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6701 rmapt[cp] = 1; 6702 cmapt[cp] = 2; 6703 cmapa[cp] = globidx; 6704 mptmp[cp] = PETSC_FALSE; 6705 cp++; 6706 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6707 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6708 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6709 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6710 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6711 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6712 mp[cp]->product->api_user = product->api_user; 6713 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6714 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6715 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6716 rmapt[cp] = 2; 6717 rmapa[cp] = p->garray; 6718 cmapt[cp] = 2; 6719 cmapa[cp] = globidx; 6720 mptmp[cp] = PETSC_FALSE; 6721 cp++; 6722 } 6723 break; 6724 case MATPRODUCT_PtAP: 6725 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6726 /* P is product->B */ 6727 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6728 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6729 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 6730 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6731 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6732 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6733 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6734 mp[cp]->product->api_user = product->api_user; 6735 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6736 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6737 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6738 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6739 rmapt[cp] = 2; 6740 rmapa[cp] = globidx; 6741 cmapt[cp] = 2; 6742 cmapa[cp] = globidx; 6743 mptmp[cp] = PETSC_FALSE; 6744 cp++; 6745 if (mmdata->P_oth) { 6746 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6747 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6748 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6749 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6750 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6751 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6752 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6753 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6754 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6755 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6756 mp[cp]->product->api_user = product->api_user; 6757 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6758 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6759 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6760 mptmp[cp] = PETSC_TRUE; 6761 cp++; 6762 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 6763 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6764 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6765 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6766 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6767 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6768 mp[cp]->product->api_user = product->api_user; 6769 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6770 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6771 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6772 rmapt[cp] = 2; 6773 rmapa[cp] = globidx; 6774 cmapt[cp] = 2; 6775 cmapa[cp] = P_oth_idx; 6776 mptmp[cp] = PETSC_FALSE; 6777 cp++; 6778 } 6779 break; 6780 default: 6781 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6782 } 6783 /* sanity check */ 6784 if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i); 6785 6786 ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr); 6787 for (i = 0; i < cp; i++) { 6788 mmdata->mp[i] = mp[i]; 6789 mmdata->mptmp[i] = mptmp[i]; 6790 } 6791 mmdata->cp = cp; 6792 C->product->data = mmdata; 6793 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 6794 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 6795 6796 /* memory type */ 6797 mmdata->mtype = PETSC_MEMTYPE_HOST; 6798 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 6799 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 6800 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 6801 // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented 6802 //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 6803 6804 /* prepare coo coordinates for values insertion */ 6805 6806 /* count total nonzeros of those intermediate seqaij Mats 6807 ncoo_d: # of nonzeros of matrices that do not have offproc entries 6808 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 6809 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 6810 */ 6811 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 6812 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6813 if (mptmp[cp]) continue; 6814 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 6815 const PetscInt *rmap = rmapa[cp]; 6816 const PetscInt mr = mp[cp]->rmap->n; 6817 const PetscInt rs = C->rmap->rstart; 6818 const PetscInt re = C->rmap->rend; 6819 const PetscInt *ii = mm->i; 6820 for (i = 0; i < mr; i++) { 6821 const PetscInt gr = rmap[i]; 6822 const PetscInt nz = ii[i+1] - ii[i]; 6823 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 6824 else ncoo_oown += nz; /* this row is local */ 6825 } 6826 } else ncoo_d += mm->nz; 6827 } 6828 6829 /* 6830 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 6831 6832 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 6833 6834 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 6835 6836 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 6837 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 6838 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 6839 6840 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 6841 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 6842 */ 6843 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */ 6844 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 6845 6846 /* gather (i,j) of nonzeros inserted by remote procs */ 6847 if (hasoffproc) { 6848 PetscSF msf; 6849 PetscInt ncoo2,*coo_i2,*coo_j2; 6850 6851 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 6852 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 6853 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */ 6854 6855 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 6856 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6857 PetscInt *idxoff = mmdata->off[cp]; 6858 PetscInt *idxown = mmdata->own[cp]; 6859 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 6860 const PetscInt *rmap = rmapa[cp]; 6861 const PetscInt *cmap = cmapa[cp]; 6862 const PetscInt *ii = mm->i; 6863 PetscInt *coi = coo_i + ncoo_o; 6864 PetscInt *coj = coo_j + ncoo_o; 6865 const PetscInt mr = mp[cp]->rmap->n; 6866 const PetscInt rs = C->rmap->rstart; 6867 const PetscInt re = C->rmap->rend; 6868 const PetscInt cs = C->cmap->rstart; 6869 for (i = 0; i < mr; i++) { 6870 const PetscInt *jj = mm->j + ii[i]; 6871 const PetscInt gr = rmap[i]; 6872 const PetscInt nz = ii[i+1] - ii[i]; 6873 if (gr < rs || gr >= re) { /* this is an offproc row */ 6874 for (j = ii[i]; j < ii[i+1]; j++) { 6875 *coi++ = gr; 6876 *idxoff++ = j; 6877 } 6878 if (!cmapt[cp]) { /* already global */ 6879 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6880 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6881 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6882 } else { /* offdiag */ 6883 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6884 } 6885 ncoo_o += nz; 6886 } else { /* this is a local row */ 6887 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 6888 } 6889 } 6890 } 6891 mmdata->off[cp + 1] = idxoff; 6892 mmdata->own[cp + 1] = idxown; 6893 } 6894 6895 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6896 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 6897 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 6898 ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr); 6899 ncoo = ncoo_d + ncoo_oown + ncoo2; 6900 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 6901 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */ 6902 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6903 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6904 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6905 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6906 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 6907 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 6908 coo_i = coo_i2; 6909 coo_j = coo_j2; 6910 } else { /* no offproc values insertion */ 6911 ncoo = ncoo_d; 6912 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 6913 6914 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6915 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 6916 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 6917 } 6918 mmdata->hasoffproc = hasoffproc; 6919 6920 /* gather (i,j) of nonzeros inserted locally */ 6921 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 6922 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6923 PetscInt *coi = coo_i + ncoo_d; 6924 PetscInt *coj = coo_j + ncoo_d; 6925 const PetscInt *jj = mm->j; 6926 const PetscInt *ii = mm->i; 6927 const PetscInt *cmap = cmapa[cp]; 6928 const PetscInt *rmap = rmapa[cp]; 6929 const PetscInt mr = mp[cp]->rmap->n; 6930 const PetscInt rs = C->rmap->rstart; 6931 const PetscInt re = C->rmap->rend; 6932 const PetscInt cs = C->cmap->rstart; 6933 6934 if (mptmp[cp]) continue; 6935 if (rmapt[cp] == 1) { /* consecutive rows */ 6936 /* fill coo_i */ 6937 for (i = 0; i < mr; i++) { 6938 const PetscInt gr = i + rs; 6939 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 6940 } 6941 /* fill coo_j */ 6942 if (!cmapt[cp]) { /* type-0, already global */ 6943 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 6944 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 6945 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 6946 } else { /* type-2, local to global for sparse columns */ 6947 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 6948 } 6949 ncoo_d += mm->nz; 6950 } else if (rmapt[cp] == 2) { /* sparse rows */ 6951 for (i = 0; i < mr; i++) { 6952 const PetscInt *jj = mm->j + ii[i]; 6953 const PetscInt gr = rmap[i]; 6954 const PetscInt nz = ii[i+1] - ii[i]; 6955 if (gr >= rs && gr < re) { /* local rows */ 6956 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 6957 if (!cmapt[cp]) { /* type-0, already global */ 6958 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6959 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6960 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6961 } else { /* type-2, local to global for sparse columns */ 6962 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6963 } 6964 ncoo_d += nz; 6965 } 6966 } 6967 } 6968 } 6969 if (glob) { 6970 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 6971 } 6972 ierr = ISDestroy(&glob);CHKERRQ(ierr); 6973 if (P_oth_l2g) { 6974 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6975 } 6976 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 6977 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 6978 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 6979 6980 /* preallocate with COO data */ 6981 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 6982 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6983 PetscFunctionReturn(0); 6984 } 6985 6986 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 6987 { 6988 Mat_Product *product = mat->product; 6989 PetscErrorCode ierr; 6990 #if defined(PETSC_HAVE_DEVICE) 6991 PetscBool match = PETSC_FALSE; 6992 PetscBool usecpu = PETSC_FALSE; 6993 #else 6994 PetscBool match = PETSC_TRUE; 6995 #endif 6996 6997 PetscFunctionBegin; 6998 MatCheckProduct(mat,1); 6999 #if defined(PETSC_HAVE_DEVICE) 7000 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7001 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 7002 } 7003 if (match) { /* we can always fallback to the CPU if requested */ 7004 switch (product->type) { 7005 case MATPRODUCT_AB: 7006 if (product->api_user) { 7007 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 7008 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7009 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7010 } else { 7011 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 7012 ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7013 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7014 } 7015 break; 7016 case MATPRODUCT_AtB: 7017 if (product->api_user) { 7018 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 7019 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7020 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7021 } else { 7022 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 7023 ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7024 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7025 } 7026 break; 7027 case MATPRODUCT_PtAP: 7028 if (product->api_user) { 7029 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 7030 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7031 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7032 } else { 7033 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 7034 ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7035 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7036 } 7037 break; 7038 default: 7039 break; 7040 } 7041 match = (PetscBool)!usecpu; 7042 } 7043 #endif 7044 if (match) { 7045 switch (product->type) { 7046 case MATPRODUCT_AB: 7047 case MATPRODUCT_AtB: 7048 case MATPRODUCT_PtAP: 7049 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7050 break; 7051 default: 7052 break; 7053 } 7054 } 7055 /* fallback to MPIAIJ ops */ 7056 if (!mat->ops->productsymbolic) { 7057 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7058 } 7059 PetscFunctionReturn(0); 7060 } 7061