1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 22 enough exist. 23 24 Level: beginner 25 26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 27 M*/ 28 29 /*MC 30 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 31 32 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 33 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 34 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 35 for communicators controlling multiple processes. It is recommended that you call both of 36 the above preallocation routines for simplicity. 37 38 Options Database Keys: 39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 40 41 Level: beginner 42 43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 44 M*/ 45 46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 51 PetscFunctionBegin; 52 if (mat->A) { 53 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 54 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 55 } 56 PetscFunctionReturn(0); 57 } 58 59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 60 { 61 PetscErrorCode ierr; 62 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 63 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 64 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 65 const PetscInt *ia,*ib; 66 const MatScalar *aa,*bb; 67 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 68 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 69 70 PetscFunctionBegin; 71 *keptrows = 0; 72 ia = a->i; 73 ib = b->i; 74 for (i=0; i<m; i++) { 75 na = ia[i+1] - ia[i]; 76 nb = ib[i+1] - ib[i]; 77 if (!na && !nb) { 78 cnt++; 79 goto ok1; 80 } 81 aa = a->a + ia[i]; 82 for (j=0; j<na; j++) { 83 if (aa[j] != 0.0) goto ok1; 84 } 85 bb = b->a + ib[i]; 86 for (j=0; j <nb; j++) { 87 if (bb[j] != 0.0) goto ok1; 88 } 89 cnt++; 90 ok1:; 91 } 92 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 93 if (!n0rows) PetscFunctionReturn(0); 94 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 95 cnt = 0; 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) continue; 100 aa = a->a + ia[i]; 101 for (j=0; j<na;j++) { 102 if (aa[j] != 0.0) { 103 rows[cnt++] = rstart + i; 104 goto ok2; 105 } 106 } 107 bb = b->a + ib[i]; 108 for (j=0; j<nb; j++) { 109 if (bb[j] != 0.0) { 110 rows[cnt++] = rstart + i; 111 goto ok2; 112 } 113 } 114 ok2:; 115 } 116 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 121 { 122 PetscErrorCode ierr; 123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 124 125 PetscFunctionBegin; 126 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 127 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 128 } else { 129 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 130 } 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 135 { 136 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 137 PetscErrorCode ierr; 138 PetscInt i,rstart,nrows,*rows; 139 140 PetscFunctionBegin; 141 *zrows = NULL; 142 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 143 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 144 for (i=0; i<nrows; i++) rows[i] += rstart; 145 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 153 PetscInt i,n,*garray = aij->garray; 154 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 155 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 156 PetscReal *work; 157 158 PetscFunctionBegin; 159 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 160 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 161 if (type == NORM_2) { 162 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 163 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 164 } 165 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 166 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 167 } 168 } else if (type == NORM_1) { 169 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 170 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 171 } 172 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 173 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 174 } 175 } else if (type == NORM_INFINITY) { 176 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 177 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 178 } 179 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 180 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 181 } 182 183 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 184 if (type == NORM_INFINITY) { 185 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 186 } else { 187 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 188 } 189 ierr = PetscFree(work);CHKERRQ(ierr); 190 if (type == NORM_2) { 191 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 197 { 198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 199 IS sis,gis; 200 PetscErrorCode ierr; 201 const PetscInt *isis,*igis; 202 PetscInt n,*iis,nsis,ngis,rstart,i; 203 204 PetscFunctionBegin; 205 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 206 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 207 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 208 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 209 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 210 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 211 212 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 213 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 215 n = ngis + nsis; 216 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 217 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 218 for (i=0; i<n; i++) iis[i] += rstart; 219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 220 221 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 222 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 223 ierr = ISDestroy(&sis);CHKERRQ(ierr); 224 ierr = ISDestroy(&gis);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 /* 229 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 230 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 231 232 Only for square matrices 233 234 Used by a preconditioner, hence PETSC_EXTERN 235 */ 236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 237 { 238 PetscMPIInt rank,size; 239 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 240 PetscErrorCode ierr; 241 Mat mat; 242 Mat_SeqAIJ *gmata; 243 PetscMPIInt tag; 244 MPI_Status status; 245 PetscBool aij; 246 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 247 248 PetscFunctionBegin; 249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 250 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 251 if (!rank) { 252 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 253 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 254 } 255 if (reuse == MAT_INITIAL_MATRIX) { 256 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 257 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 258 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 259 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 260 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 261 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 262 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 263 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 264 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 265 266 rowners[0] = 0; 267 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 268 rstart = rowners[rank]; 269 rend = rowners[rank+1]; 270 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 271 if (!rank) { 272 gmata = (Mat_SeqAIJ*) gmat->data; 273 /* send row lengths to all processors */ 274 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 275 for (i=1; i<size; i++) { 276 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 277 } 278 /* determine number diagonal and off-diagonal counts */ 279 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 280 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 281 jj = 0; 282 for (i=0; i<m; i++) { 283 for (j=0; j<dlens[i]; j++) { 284 if (gmata->j[jj] < rstart) ld[i]++; 285 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 286 jj++; 287 } 288 } 289 /* send column indices to other processes */ 290 for (i=1; i<size; i++) { 291 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 292 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 } 295 296 /* send numerical values to other processes */ 297 for (i=1; i<size; i++) { 298 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 299 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 300 } 301 gmataa = gmata->a; 302 gmataj = gmata->j; 303 304 } else { 305 /* receive row lengths */ 306 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 307 /* receive column indices */ 308 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 309 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 310 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* determine number diagonal and off-diagonal counts */ 312 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 313 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 314 jj = 0; 315 for (i=0; i<m; i++) { 316 for (j=0; j<dlens[i]; j++) { 317 if (gmataj[jj] < rstart) ld[i]++; 318 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 319 jj++; 320 } 321 } 322 /* receive numerical values */ 323 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 324 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 325 } 326 /* set preallocation */ 327 for (i=0; i<m; i++) { 328 dlens[i] -= olens[i]; 329 } 330 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 331 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 332 333 for (i=0; i<m; i++) { 334 dlens[i] += olens[i]; 335 } 336 cnt = 0; 337 for (i=0; i<m; i++) { 338 row = rstart + i; 339 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 340 cnt += dlens[i]; 341 } 342 if (rank) { 343 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 344 } 345 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 346 ierr = PetscFree(rowners);CHKERRQ(ierr); 347 348 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 349 350 *inmat = mat; 351 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 352 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 353 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 354 mat = *inmat; 355 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 356 if (!rank) { 357 /* send numerical values to other processes */ 358 gmata = (Mat_SeqAIJ*) gmat->data; 359 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 360 gmataa = gmata->a; 361 for (i=1; i<size; i++) { 362 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 363 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 364 } 365 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 366 } else { 367 /* receive numerical values from process 0*/ 368 nz = Ad->nz + Ao->nz; 369 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 370 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 371 } 372 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 373 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 374 ad = Ad->a; 375 ao = Ao->a; 376 if (mat->rmap->n) { 377 i = 0; 378 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 379 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 380 } 381 for (i=1; i<mat->rmap->n; i++) { 382 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 i--; 386 if (mat->rmap->n) { 387 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 388 } 389 if (rank) { 390 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 391 } 392 } 393 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 /* 399 Local utility routine that creates a mapping from the global column 400 number to the local number in the off-diagonal part of the local 401 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 402 a slightly higher hash table cost; without it it is not scalable (each processor 403 has an order N integer array but is fast to acess. 404 */ 405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 406 { 407 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscInt n = aij->B->cmap->n,i; 410 411 PetscFunctionBegin; 412 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 413 #if defined(PETSC_USE_CTABLE) 414 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 415 for (i=0; i<n; i++) { 416 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 417 } 418 #else 419 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 420 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 421 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 422 #endif 423 PetscFunctionReturn(0); 424 } 425 426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 427 { \ 428 if (col <= lastcol1) low1 = 0; \ 429 else high1 = nrow1; \ 430 lastcol1 = col;\ 431 while (high1-low1 > 5) { \ 432 t = (low1+high1)/2; \ 433 if (rp1[t] > col) high1 = t; \ 434 else low1 = t; \ 435 } \ 436 for (_i=low1; _i<high1; _i++) { \ 437 if (rp1[_i] > col) break; \ 438 if (rp1[_i] == col) { \ 439 if (addv == ADD_VALUES) ap1[_i] += value; \ 440 else ap1[_i] = value; \ 441 goto a_noinsert; \ 442 } \ 443 } \ 444 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 445 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 446 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 447 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 448 N = nrow1++ - 1; a->nz++; high1++; \ 449 /* shift up all the later entries in this row */ \ 450 for (ii=N; ii>=_i; ii--) { \ 451 rp1[ii+1] = rp1[ii]; \ 452 ap1[ii+1] = ap1[ii]; \ 453 } \ 454 rp1[_i] = col; \ 455 ap1[_i] = value; \ 456 A->nonzerostate++;\ 457 a_noinsert: ; \ 458 ailen[row] = nrow1; \ 459 } 460 461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 462 { \ 463 if (col <= lastcol2) low2 = 0; \ 464 else high2 = nrow2; \ 465 lastcol2 = col; \ 466 while (high2-low2 > 5) { \ 467 t = (low2+high2)/2; \ 468 if (rp2[t] > col) high2 = t; \ 469 else low2 = t; \ 470 } \ 471 for (_i=low2; _i<high2; _i++) { \ 472 if (rp2[_i] > col) break; \ 473 if (rp2[_i] == col) { \ 474 if (addv == ADD_VALUES) ap2[_i] += value; \ 475 else ap2[_i] = value; \ 476 goto b_noinsert; \ 477 } \ 478 } \ 479 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 480 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 482 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 483 N = nrow2++ - 1; b->nz++; high2++; \ 484 /* shift up all the later entries in this row */ \ 485 for (ii=N; ii>=_i; ii--) { \ 486 rp2[ii+1] = rp2[ii]; \ 487 ap2[ii+1] = ap2[ii]; \ 488 } \ 489 rp2[_i] = col; \ 490 ap2[_i] = value; \ 491 B->nonzerostate++; \ 492 b_noinsert: ; \ 493 bilen[row] = nrow2; \ 494 } 495 496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 497 { 498 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 499 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 500 PetscErrorCode ierr; 501 PetscInt l,*garray = mat->garray,diag; 502 503 PetscFunctionBegin; 504 /* code only works for square matrices A */ 505 506 /* find size of row to the left of the diagonal part */ 507 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 508 row = row - diag; 509 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 510 if (garray[b->j[b->i[row]+l]] > diag) break; 511 } 512 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 513 514 /* diagonal part */ 515 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* right of diagonal part */ 518 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 523 { 524 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 525 PetscScalar value; 526 PetscErrorCode ierr; 527 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 528 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 529 PetscBool roworiented = aij->roworiented; 530 531 /* Some Variables required in the macro */ 532 Mat A = aij->A; 533 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 534 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 535 MatScalar *aa = a->a; 536 PetscBool ignorezeroentries = a->ignorezeroentries; 537 Mat B = aij->B; 538 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 539 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 540 MatScalar *ba = b->a; 541 542 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 543 PetscInt nonew; 544 MatScalar *ap1,*ap2; 545 546 PetscFunctionBegin; 547 for (i=0; i<m; i++) { 548 if (im[i] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 551 #endif 552 if (im[i] >= rstart && im[i] < rend) { 553 row = im[i] - rstart; 554 lastcol1 = -1; 555 rp1 = aj + ai[row]; 556 ap1 = aa + ai[row]; 557 rmax1 = aimax[row]; 558 nrow1 = ailen[row]; 559 low1 = 0; 560 high1 = nrow1; 561 lastcol2 = -1; 562 rp2 = bj + bi[row]; 563 ap2 = ba + bi[row]; 564 rmax2 = bimax[row]; 565 nrow2 = bilen[row]; 566 low2 = 0; 567 high2 = nrow2; 568 569 for (j=0; j<n; j++) { 570 if (roworiented) value = v[i*n+j]; 571 else value = v[i+j*m]; 572 if (in[j] >= cstart && in[j] < cend) { 573 col = in[j] - cstart; 574 nonew = a->nonew; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 576 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 577 } else if (in[j] < 0) continue; 578 #if defined(PETSC_USE_DEBUG) 579 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 580 #endif 581 else { 582 if (mat->was_assembled) { 583 if (!aij->colmap) { 584 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 585 } 586 #if defined(PETSC_USE_CTABLE) 587 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 593 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ*)B->data; 598 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 599 rp2 = bj + bi[row]; 600 ap2 = ba + bi[row]; 601 rmax2 = bimax[row]; 602 nrow2 = bilen[row]; 603 low2 = 0; 604 high2 = nrow2; 605 bm = aij->B->rmap->n; 606 ba = b->a; 607 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 608 } else col = in[j]; 609 nonew = b->nonew; 610 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 611 } 612 } 613 } else { 614 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 615 if (!aij->donotstash) { 616 mat->assembled = PETSC_FALSE; 617 if (roworiented) { 618 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 619 } else { 620 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 621 } 622 } 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!aij->colmap) { 649 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 669 670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 673 PetscErrorCode ierr; 674 PetscInt nstash,reallocs; 675 676 PetscFunctionBegin; 677 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 678 679 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 680 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 681 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 689 PetscErrorCode ierr; 690 PetscMPIInt n; 691 PetscInt i,j,rstart,ncols,flg; 692 PetscInt *row,*col; 693 PetscBool other_disassembled; 694 PetscScalar *val; 695 696 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 697 698 PetscFunctionBegin; 699 if (!aij->donotstash && !mat->nooffprocentries) { 700 while (1) { 701 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 702 if (!flg) break; 703 704 for (i=0; i<n; ) { 705 /* Now identify the consecutive vals belonging to the same row */ 706 for (j=i,rstart=row[j]; j<n; j++) { 707 if (row[j] != rstart) break; 708 } 709 if (j < n) ncols = j-i; 710 else ncols = n-i; 711 /* Now assemble all these values with a single function call */ 712 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 713 714 i = j; 715 } 716 } 717 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 718 } 719 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 721 722 /* determine if any processor has disassembled, if so we must 723 also disassemble ourselfs, in order that we may reassemble. */ 724 /* 725 if nonzero structure of submatrix B cannot change then we know that 726 no processor disassembled thus we can skip this stuff 727 */ 728 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 729 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 730 if (mat->was_assembled && !other_disassembled) { 731 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 732 } 733 } 734 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 735 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 736 } 737 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 738 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 739 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 740 741 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 742 743 aij->rowvalues = 0; 744 745 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 746 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 747 748 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 749 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 750 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 751 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 757 { 758 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 763 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 768 { 769 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 770 PetscInt *lrows; 771 PetscInt r, len; 772 PetscErrorCode ierr; 773 774 PetscFunctionBegin; 775 /* get locally owned rows */ 776 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 783 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 784 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 785 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 786 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 787 } 788 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 789 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 790 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 791 PetscBool cong; 792 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 793 if (cong) A->congruentlayouts = 1; 794 else A->congruentlayouts = 0; 795 } 796 if ((diag != 0.0) && A->congruentlayouts) { 797 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 798 } else if (diag != 0.0) { 799 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 800 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 801 for (r = 0; r < len; ++r) { 802 const PetscInt row = lrows[r] + A->rmap->rstart; 803 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 804 } 805 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 } else { 808 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(lrows);CHKERRQ(ierr); 811 812 /* only change matrix nonzero state if pattern was allowed to be changed */ 813 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 821 { 822 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 823 PetscErrorCode ierr; 824 PetscMPIInt n = A->rmap->n; 825 PetscInt i,j,r,m,p = 0,len = 0; 826 PetscInt *lrows,*owners = A->rmap->range; 827 PetscSFNode *rrows; 828 PetscSF sf; 829 const PetscScalar *xx; 830 PetscScalar *bb,*mask; 831 Vec xmask,lmask; 832 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 833 const PetscInt *aj, *ii,*ridx; 834 PetscScalar *aa; 835 836 PetscFunctionBegin; 837 /* Create SF where leaves are input rows and roots are owned rows */ 838 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 839 for (r = 0; r < n; ++r) lrows[r] = -1; 840 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 841 for (r = 0; r < N; ++r) { 842 const PetscInt idx = rows[r]; 843 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 844 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 845 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 846 } 847 rrows[r].rank = p; 848 rrows[r].index = rows[r] - owners[p]; 849 } 850 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 851 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 852 /* Collect flags for rows to be zeroed */ 853 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 854 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 856 /* Compress and put in row numbers */ 857 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 858 /* zero diagonal part of matrix */ 859 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 860 /* handle off diagonal part of matrix */ 861 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 862 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 863 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 864 for (i=0; i<len; i++) bb[lrows[i]] = 1; 865 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 866 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 869 if (x) { 870 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 873 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 874 } 875 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 876 /* remove zeroed rows of off diagonal matrix */ 877 ii = aij->i; 878 for (i=0; i<len; i++) { 879 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 /* loop over all elements of off process part of matrix zeroing removed columns*/ 882 if (aij->compressedrow.use) { 883 m = aij->compressedrow.nrows; 884 ii = aij->compressedrow.i; 885 ridx = aij->compressedrow.rindex; 886 for (i=0; i<m; i++) { 887 n = ii[i+1] - ii[i]; 888 aj = aij->j + ii[i]; 889 aa = aij->a + ii[i]; 890 891 for (j=0; j<n; j++) { 892 if (PetscAbsScalar(mask[*aj])) { 893 if (b) bb[*ridx] -= *aa*xx[*aj]; 894 *aa = 0.0; 895 } 896 aa++; 897 aj++; 898 } 899 ridx++; 900 } 901 } else { /* do not use compressed row format */ 902 m = l->B->rmap->n; 903 for (i=0; i<m; i++) { 904 n = ii[i+1] - ii[i]; 905 aj = aij->j + ii[i]; 906 aa = aij->a + ii[i]; 907 for (j=0; j<n; j++) { 908 if (PetscAbsScalar(mask[*aj])) { 909 if (b) bb[i] -= *aa*xx[*aj]; 910 *aa = 0.0; 911 } 912 aa++; 913 aj++; 914 } 915 } 916 } 917 if (x) { 918 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 919 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 920 } 921 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 922 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 923 ierr = PetscFree(lrows);CHKERRQ(ierr); 924 925 /* only change matrix nonzero state if pattern was allowed to be changed */ 926 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 927 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 928 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 929 } 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 934 { 935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt nt; 938 VecScatter Mvctx = a->Mvctx; 939 940 PetscFunctionBegin; 941 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 942 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 943 944 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 946 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 947 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 948 PetscFunctionReturn(0); 949 } 950 951 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 952 { 953 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 954 PetscErrorCode ierr; 955 956 PetscFunctionBegin; 957 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 962 { 963 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 964 PetscErrorCode ierr; 965 VecScatter Mvctx = a->Mvctx; 966 967 PetscFunctionBegin; 968 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 969 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 970 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 971 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 972 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 977 { 978 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 979 PetscErrorCode ierr; 980 PetscBool merged; 981 982 PetscFunctionBegin; 983 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 984 /* do nondiagonal part */ 985 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 986 if (!merged) { 987 /* send it on its way */ 988 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 989 /* do local part */ 990 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 991 /* receive remote parts: note this assumes the values are not actually */ 992 /* added in yy until the next line, */ 993 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 994 } else { 995 /* do local part */ 996 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 997 /* send it on its way */ 998 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 /* values actually were received in the Begin() but we need to call this nop */ 1000 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1001 } 1002 PetscFunctionReturn(0); 1003 } 1004 1005 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1006 { 1007 MPI_Comm comm; 1008 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1009 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1010 IS Me,Notme; 1011 PetscErrorCode ierr; 1012 PetscInt M,N,first,last,*notme,i; 1013 PetscMPIInt size; 1014 1015 PetscFunctionBegin; 1016 /* Easy test: symmetric diagonal block */ 1017 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1018 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1019 if (!*f) PetscFunctionReturn(0); 1020 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1021 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1022 if (size == 1) PetscFunctionReturn(0); 1023 1024 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1025 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1026 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1027 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1028 for (i=0; i<first; i++) notme[i] = i; 1029 for (i=last; i<M; i++) notme[i-last+first] = i; 1030 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1031 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1032 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1033 Aoff = Aoffs[0]; 1034 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1035 Boff = Boffs[0]; 1036 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1037 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1038 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1039 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1040 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1041 ierr = PetscFree(notme);CHKERRQ(ierr); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1046 { 1047 PetscErrorCode ierr; 1048 1049 PetscFunctionBegin; 1050 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1051 PetscFunctionReturn(0); 1052 } 1053 1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1055 { 1056 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 /* do nondiagonal part */ 1061 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1062 /* send it on its way */ 1063 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1064 /* do local part */ 1065 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1066 /* receive remote parts */ 1067 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 /* 1072 This only works correctly for square matrices where the subblock A->A is the 1073 diagonal block 1074 */ 1075 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1076 { 1077 PetscErrorCode ierr; 1078 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1079 1080 PetscFunctionBegin; 1081 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1082 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1083 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1088 { 1089 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 1092 PetscFunctionBegin; 1093 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1094 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1099 { 1100 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 #if defined(PETSC_USE_LOG) 1105 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1106 #endif 1107 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1108 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1109 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1110 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1111 #if defined(PETSC_USE_CTABLE) 1112 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1113 #else 1114 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1115 #endif 1116 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1117 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1118 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1119 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1120 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1121 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1122 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1123 1124 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1133 #if defined(PETSC_HAVE_ELEMENTAL) 1134 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1135 #endif 1136 #if defined(PETSC_HAVE_HYPRE) 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1139 #endif 1140 PetscFunctionReturn(0); 1141 } 1142 1143 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1144 { 1145 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1146 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1147 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1148 PetscErrorCode ierr; 1149 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1150 int fd; 1151 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1152 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1153 PetscScalar *column_values; 1154 PetscInt message_count,flowcontrolcount; 1155 FILE *file; 1156 1157 PetscFunctionBegin; 1158 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1159 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1160 nz = A->nz + B->nz; 1161 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1162 if (!rank) { 1163 header[0] = MAT_FILE_CLASSID; 1164 header[1] = mat->rmap->N; 1165 header[2] = mat->cmap->N; 1166 1167 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1168 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1169 /* get largest number of rows any processor has */ 1170 rlen = mat->rmap->n; 1171 range = mat->rmap->range; 1172 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1173 } else { 1174 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1175 rlen = mat->rmap->n; 1176 } 1177 1178 /* load up the local row counts */ 1179 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1180 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1181 1182 /* store the row lengths to the file */ 1183 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1184 if (!rank) { 1185 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1186 for (i=1; i<size; i++) { 1187 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1188 rlen = range[i+1] - range[i]; 1189 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1190 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1191 } 1192 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1193 } else { 1194 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1195 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1196 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1197 } 1198 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1199 1200 /* load up the local column indices */ 1201 nzmax = nz; /* th processor needs space a largest processor needs */ 1202 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1203 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1204 cnt = 0; 1205 for (i=0; i<mat->rmap->n; i++) { 1206 for (j=B->i[i]; j<B->i[i+1]; j++) { 1207 if ((col = garray[B->j[j]]) > cstart) break; 1208 column_indices[cnt++] = col; 1209 } 1210 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1211 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1212 } 1213 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1214 1215 /* store the column indices to the file */ 1216 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1217 if (!rank) { 1218 MPI_Status status; 1219 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1220 for (i=1; i<size; i++) { 1221 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1222 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1223 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1224 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1225 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1226 } 1227 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1228 } else { 1229 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1230 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1231 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1235 1236 /* load up the local column values */ 1237 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1238 cnt = 0; 1239 for (i=0; i<mat->rmap->n; i++) { 1240 for (j=B->i[i]; j<B->i[i+1]; j++) { 1241 if (garray[B->j[j]] > cstart) break; 1242 column_values[cnt++] = B->a[j]; 1243 } 1244 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1245 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1246 } 1247 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1248 1249 /* store the column values to the file */ 1250 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1251 if (!rank) { 1252 MPI_Status status; 1253 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1254 for (i=1; i<size; i++) { 1255 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1256 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1257 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1258 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1259 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1260 } 1261 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1262 } else { 1263 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1264 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1265 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1266 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1267 } 1268 ierr = PetscFree(column_values);CHKERRQ(ierr); 1269 1270 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1271 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #include <petscdraw.h> 1276 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1277 { 1278 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1279 PetscErrorCode ierr; 1280 PetscMPIInt rank = aij->rank,size = aij->size; 1281 PetscBool isdraw,iascii,isbinary; 1282 PetscViewer sviewer; 1283 PetscViewerFormat format; 1284 1285 PetscFunctionBegin; 1286 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1287 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1288 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1289 if (iascii) { 1290 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscBool inodes; 1294 1295 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1296 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1297 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1298 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1299 if (!inodes) { 1300 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1301 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1302 } else { 1303 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1304 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1305 } 1306 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1308 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1309 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1310 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1311 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1312 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1313 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1316 PetscInt inodecount,inodelimit,*inodes; 1317 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1318 if (inodes) { 1319 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1320 } else { 1321 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1322 } 1323 PetscFunctionReturn(0); 1324 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1325 PetscFunctionReturn(0); 1326 } 1327 } else if (isbinary) { 1328 if (size == 1) { 1329 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1330 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1331 } else { 1332 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1333 } 1334 PetscFunctionReturn(0); 1335 } else if (isdraw) { 1336 PetscDraw draw; 1337 PetscBool isnull; 1338 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1339 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1340 if (isnull) PetscFunctionReturn(0); 1341 } 1342 1343 { 1344 /* assemble the entire matrix onto first processor. */ 1345 Mat A; 1346 Mat_SeqAIJ *Aloc; 1347 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1348 MatScalar *a; 1349 1350 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1351 if (!rank) { 1352 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1353 } else { 1354 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1355 } 1356 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1357 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1358 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1359 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1360 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1361 1362 /* copy over the A part */ 1363 Aloc = (Mat_SeqAIJ*)aij->A->data; 1364 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1365 row = mat->rmap->rstart; 1366 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1367 for (i=0; i<m; i++) { 1368 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1369 row++; 1370 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1371 } 1372 aj = Aloc->j; 1373 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1374 1375 /* copy over the B part */ 1376 Aloc = (Mat_SeqAIJ*)aij->B->data; 1377 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1378 row = mat->rmap->rstart; 1379 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1380 ct = cols; 1381 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1382 for (i=0; i<m; i++) { 1383 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1384 row++; 1385 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1386 } 1387 ierr = PetscFree(ct);CHKERRQ(ierr); 1388 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1389 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1390 /* 1391 Everyone has to call to draw the matrix since the graphics waits are 1392 synchronized across all processors that share the PetscDraw object 1393 */ 1394 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1395 if (!rank) { 1396 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1397 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1398 } 1399 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1400 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1401 ierr = MatDestroy(&A);CHKERRQ(ierr); 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1407 { 1408 PetscErrorCode ierr; 1409 PetscBool iascii,isdraw,issocket,isbinary; 1410 1411 PetscFunctionBegin; 1412 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1413 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1414 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1415 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1416 if (iascii || isdraw || isbinary || issocket) { 1417 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1418 } 1419 PetscFunctionReturn(0); 1420 } 1421 1422 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1423 { 1424 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1425 PetscErrorCode ierr; 1426 Vec bb1 = 0; 1427 PetscBool hasop; 1428 1429 PetscFunctionBegin; 1430 if (flag == SOR_APPLY_UPPER) { 1431 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 } 1434 1435 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1436 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1437 } 1438 1439 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1440 if (flag & SOR_ZERO_INITIAL_GUESS) { 1441 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1442 its--; 1443 } 1444 1445 while (its--) { 1446 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1447 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1448 1449 /* update rhs: bb1 = bb - B*x */ 1450 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1451 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1452 1453 /* local sweep */ 1454 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1455 } 1456 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1457 if (flag & SOR_ZERO_INITIAL_GUESS) { 1458 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1459 its--; 1460 } 1461 while (its--) { 1462 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1463 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1464 1465 /* update rhs: bb1 = bb - B*x */ 1466 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1467 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1468 1469 /* local sweep */ 1470 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1471 } 1472 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1473 if (flag & SOR_ZERO_INITIAL_GUESS) { 1474 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1475 its--; 1476 } 1477 while (its--) { 1478 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1479 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1480 1481 /* update rhs: bb1 = bb - B*x */ 1482 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1483 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1484 1485 /* local sweep */ 1486 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1487 } 1488 } else if (flag & SOR_EISENSTAT) { 1489 Vec xx1; 1490 1491 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1492 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1493 1494 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1495 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1496 if (!mat->diag) { 1497 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1498 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1499 } 1500 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1501 if (hasop) { 1502 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1503 } else { 1504 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1505 } 1506 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1507 1508 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1509 1510 /* local sweep */ 1511 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1512 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1513 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1514 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1515 1516 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1517 1518 matin->factorerrortype = mat->A->factorerrortype; 1519 PetscFunctionReturn(0); 1520 } 1521 1522 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1523 { 1524 Mat aA,aB,Aperm; 1525 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1526 PetscScalar *aa,*ba; 1527 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1528 PetscSF rowsf,sf; 1529 IS parcolp = NULL; 1530 PetscBool done; 1531 PetscErrorCode ierr; 1532 1533 PetscFunctionBegin; 1534 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1535 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1536 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1537 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1538 1539 /* Invert row permutation to find out where my rows should go */ 1540 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1541 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1542 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1543 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1544 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1545 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1546 1547 /* Invert column permutation to find out where my columns should go */ 1548 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1549 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1550 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1551 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1552 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1553 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1554 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1555 1556 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1557 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1558 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1559 1560 /* Find out where my gcols should go */ 1561 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1562 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1563 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1564 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1565 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1566 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1567 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1568 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1569 1570 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1571 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1572 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1573 for (i=0; i<m; i++) { 1574 PetscInt row = rdest[i],rowner; 1575 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1576 for (j=ai[i]; j<ai[i+1]; j++) { 1577 PetscInt cowner,col = cdest[aj[j]]; 1578 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1579 if (rowner == cowner) dnnz[i]++; 1580 else onnz[i]++; 1581 } 1582 for (j=bi[i]; j<bi[i+1]; j++) { 1583 PetscInt cowner,col = gcdest[bj[j]]; 1584 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1585 if (rowner == cowner) dnnz[i]++; 1586 else onnz[i]++; 1587 } 1588 } 1589 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1590 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1591 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1592 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1593 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1594 1595 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1596 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1597 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1598 for (i=0; i<m; i++) { 1599 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1600 PetscInt j0,rowlen; 1601 rowlen = ai[i+1] - ai[i]; 1602 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1603 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1604 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1605 } 1606 rowlen = bi[i+1] - bi[i]; 1607 for (j0=j=0; j<rowlen; j0=j) { 1608 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1609 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1610 } 1611 } 1612 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1613 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1614 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1615 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1616 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1617 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1618 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1619 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1620 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1621 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1622 *B = Aperm; 1623 PetscFunctionReturn(0); 1624 } 1625 1626 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1627 { 1628 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1629 PetscErrorCode ierr; 1630 1631 PetscFunctionBegin; 1632 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1633 if (ghosts) *ghosts = aij->garray; 1634 PetscFunctionReturn(0); 1635 } 1636 1637 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1638 { 1639 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1640 Mat A = mat->A,B = mat->B; 1641 PetscErrorCode ierr; 1642 PetscReal isend[5],irecv[5]; 1643 1644 PetscFunctionBegin; 1645 info->block_size = 1.0; 1646 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1647 1648 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1649 isend[3] = info->memory; isend[4] = info->mallocs; 1650 1651 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1652 1653 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1654 isend[3] += info->memory; isend[4] += info->mallocs; 1655 if (flag == MAT_LOCAL) { 1656 info->nz_used = isend[0]; 1657 info->nz_allocated = isend[1]; 1658 info->nz_unneeded = isend[2]; 1659 info->memory = isend[3]; 1660 info->mallocs = isend[4]; 1661 } else if (flag == MAT_GLOBAL_MAX) { 1662 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1663 1664 info->nz_used = irecv[0]; 1665 info->nz_allocated = irecv[1]; 1666 info->nz_unneeded = irecv[2]; 1667 info->memory = irecv[3]; 1668 info->mallocs = irecv[4]; 1669 } else if (flag == MAT_GLOBAL_SUM) { 1670 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1671 1672 info->nz_used = irecv[0]; 1673 info->nz_allocated = irecv[1]; 1674 info->nz_unneeded = irecv[2]; 1675 info->memory = irecv[3]; 1676 info->mallocs = irecv[4]; 1677 } 1678 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1679 info->fill_ratio_needed = 0; 1680 info->factor_mallocs = 0; 1681 PetscFunctionReturn(0); 1682 } 1683 1684 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1685 { 1686 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1687 PetscErrorCode ierr; 1688 1689 PetscFunctionBegin; 1690 switch (op) { 1691 case MAT_NEW_NONZERO_LOCATIONS: 1692 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1693 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1694 case MAT_KEEP_NONZERO_PATTERN: 1695 case MAT_NEW_NONZERO_LOCATION_ERR: 1696 case MAT_USE_INODES: 1697 case MAT_IGNORE_ZERO_ENTRIES: 1698 MatCheckPreallocated(A,1); 1699 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1700 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1701 break; 1702 case MAT_ROW_ORIENTED: 1703 MatCheckPreallocated(A,1); 1704 a->roworiented = flg; 1705 1706 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1707 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1708 break; 1709 case MAT_NEW_DIAGONALS: 1710 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1711 break; 1712 case MAT_IGNORE_OFF_PROC_ENTRIES: 1713 a->donotstash = flg; 1714 break; 1715 case MAT_SPD: 1716 A->spd_set = PETSC_TRUE; 1717 A->spd = flg; 1718 if (flg) { 1719 A->symmetric = PETSC_TRUE; 1720 A->structurally_symmetric = PETSC_TRUE; 1721 A->symmetric_set = PETSC_TRUE; 1722 A->structurally_symmetric_set = PETSC_TRUE; 1723 } 1724 break; 1725 case MAT_SYMMETRIC: 1726 MatCheckPreallocated(A,1); 1727 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1728 break; 1729 case MAT_STRUCTURALLY_SYMMETRIC: 1730 MatCheckPreallocated(A,1); 1731 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1732 break; 1733 case MAT_HERMITIAN: 1734 MatCheckPreallocated(A,1); 1735 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1736 break; 1737 case MAT_SYMMETRY_ETERNAL: 1738 MatCheckPreallocated(A,1); 1739 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1740 break; 1741 case MAT_SUBMAT_SINGLEIS: 1742 A->submat_singleis = flg; 1743 break; 1744 case MAT_STRUCTURE_ONLY: 1745 /* The option is handled directly by MatSetOption() */ 1746 break; 1747 default: 1748 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1749 } 1750 PetscFunctionReturn(0); 1751 } 1752 1753 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1754 { 1755 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1756 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1757 PetscErrorCode ierr; 1758 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1759 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1760 PetscInt *cmap,*idx_p; 1761 1762 PetscFunctionBegin; 1763 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1764 mat->getrowactive = PETSC_TRUE; 1765 1766 if (!mat->rowvalues && (idx || v)) { 1767 /* 1768 allocate enough space to hold information from the longest row. 1769 */ 1770 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1771 PetscInt max = 1,tmp; 1772 for (i=0; i<matin->rmap->n; i++) { 1773 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1774 if (max < tmp) max = tmp; 1775 } 1776 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1777 } 1778 1779 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1780 lrow = row - rstart; 1781 1782 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1783 if (!v) {pvA = 0; pvB = 0;} 1784 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1785 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1786 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1787 nztot = nzA + nzB; 1788 1789 cmap = mat->garray; 1790 if (v || idx) { 1791 if (nztot) { 1792 /* Sort by increasing column numbers, assuming A and B already sorted */ 1793 PetscInt imark = -1; 1794 if (v) { 1795 *v = v_p = mat->rowvalues; 1796 for (i=0; i<nzB; i++) { 1797 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1798 else break; 1799 } 1800 imark = i; 1801 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1802 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1803 } 1804 if (idx) { 1805 *idx = idx_p = mat->rowindices; 1806 if (imark > -1) { 1807 for (i=0; i<imark; i++) { 1808 idx_p[i] = cmap[cworkB[i]]; 1809 } 1810 } else { 1811 for (i=0; i<nzB; i++) { 1812 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1813 else break; 1814 } 1815 imark = i; 1816 } 1817 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1818 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1819 } 1820 } else { 1821 if (idx) *idx = 0; 1822 if (v) *v = 0; 1823 } 1824 } 1825 *nz = nztot; 1826 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1827 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1828 PetscFunctionReturn(0); 1829 } 1830 1831 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1832 { 1833 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1834 1835 PetscFunctionBegin; 1836 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1837 aij->getrowactive = PETSC_FALSE; 1838 PetscFunctionReturn(0); 1839 } 1840 1841 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1842 { 1843 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1844 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1845 PetscErrorCode ierr; 1846 PetscInt i,j,cstart = mat->cmap->rstart; 1847 PetscReal sum = 0.0; 1848 MatScalar *v; 1849 1850 PetscFunctionBegin; 1851 if (aij->size == 1) { 1852 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1853 } else { 1854 if (type == NORM_FROBENIUS) { 1855 v = amat->a; 1856 for (i=0; i<amat->nz; i++) { 1857 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1858 } 1859 v = bmat->a; 1860 for (i=0; i<bmat->nz; i++) { 1861 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1862 } 1863 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1864 *norm = PetscSqrtReal(*norm); 1865 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1866 } else if (type == NORM_1) { /* max column norm */ 1867 PetscReal *tmp,*tmp2; 1868 PetscInt *jj,*garray = aij->garray; 1869 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1870 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1871 *norm = 0.0; 1872 v = amat->a; jj = amat->j; 1873 for (j=0; j<amat->nz; j++) { 1874 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1875 } 1876 v = bmat->a; jj = bmat->j; 1877 for (j=0; j<bmat->nz; j++) { 1878 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1879 } 1880 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1881 for (j=0; j<mat->cmap->N; j++) { 1882 if (tmp2[j] > *norm) *norm = tmp2[j]; 1883 } 1884 ierr = PetscFree(tmp);CHKERRQ(ierr); 1885 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1886 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1887 } else if (type == NORM_INFINITY) { /* max row norm */ 1888 PetscReal ntemp = 0.0; 1889 for (j=0; j<aij->A->rmap->n; j++) { 1890 v = amat->a + amat->i[j]; 1891 sum = 0.0; 1892 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1893 sum += PetscAbsScalar(*v); v++; 1894 } 1895 v = bmat->a + bmat->i[j]; 1896 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1897 sum += PetscAbsScalar(*v); v++; 1898 } 1899 if (sum > ntemp) ntemp = sum; 1900 } 1901 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1902 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1903 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1904 } 1905 PetscFunctionReturn(0); 1906 } 1907 1908 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1909 { 1910 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1911 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1912 PetscErrorCode ierr; 1913 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1914 PetscInt cstart = A->cmap->rstart,ncol; 1915 Mat B; 1916 MatScalar *array; 1917 1918 PetscFunctionBegin; 1919 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1920 1921 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1922 ai = Aloc->i; aj = Aloc->j; 1923 bi = Bloc->i; bj = Bloc->j; 1924 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1925 PetscInt *d_nnz,*g_nnz,*o_nnz; 1926 PetscSFNode *oloc; 1927 PETSC_UNUSED PetscSF sf; 1928 1929 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1930 /* compute d_nnz for preallocation */ 1931 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1932 for (i=0; i<ai[ma]; i++) { 1933 d_nnz[aj[i]]++; 1934 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1935 } 1936 /* compute local off-diagonal contributions */ 1937 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1938 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1939 /* map those to global */ 1940 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1941 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1942 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1943 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1944 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1945 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1946 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1947 1948 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1949 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1950 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1951 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1952 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1953 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1954 } else { 1955 B = *matout; 1956 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1957 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1958 } 1959 1960 /* copy over the A part */ 1961 array = Aloc->a; 1962 row = A->rmap->rstart; 1963 for (i=0; i<ma; i++) { 1964 ncol = ai[i+1]-ai[i]; 1965 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1966 row++; 1967 array += ncol; aj += ncol; 1968 } 1969 aj = Aloc->j; 1970 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1971 1972 /* copy over the B part */ 1973 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1974 array = Bloc->a; 1975 row = A->rmap->rstart; 1976 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1977 cols_tmp = cols; 1978 for (i=0; i<mb; i++) { 1979 ncol = bi[i+1]-bi[i]; 1980 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1981 row++; 1982 array += ncol; cols_tmp += ncol; 1983 } 1984 ierr = PetscFree(cols);CHKERRQ(ierr); 1985 1986 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1987 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1988 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1989 *matout = B; 1990 } else { 1991 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1992 } 1993 PetscFunctionReturn(0); 1994 } 1995 1996 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1997 { 1998 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1999 Mat a = aij->A,b = aij->B; 2000 PetscErrorCode ierr; 2001 PetscInt s1,s2,s3; 2002 2003 PetscFunctionBegin; 2004 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2005 if (rr) { 2006 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2007 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2008 /* Overlap communication with computation. */ 2009 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2010 } 2011 if (ll) { 2012 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2013 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2014 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2015 } 2016 /* scale the diagonal block */ 2017 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2018 2019 if (rr) { 2020 /* Do a scatter end and then right scale the off-diagonal block */ 2021 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2022 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2023 } 2024 PetscFunctionReturn(0); 2025 } 2026 2027 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2028 { 2029 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2030 PetscErrorCode ierr; 2031 2032 PetscFunctionBegin; 2033 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2038 { 2039 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2040 Mat a,b,c,d; 2041 PetscBool flg; 2042 PetscErrorCode ierr; 2043 2044 PetscFunctionBegin; 2045 a = matA->A; b = matA->B; 2046 c = matB->A; d = matB->B; 2047 2048 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2049 if (flg) { 2050 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2051 } 2052 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2057 { 2058 PetscErrorCode ierr; 2059 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2060 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2061 2062 PetscFunctionBegin; 2063 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2064 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2065 /* because of the column compression in the off-processor part of the matrix a->B, 2066 the number of columns in a->B and b->B may be different, hence we cannot call 2067 the MatCopy() directly on the two parts. If need be, we can provide a more 2068 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2069 then copying the submatrices */ 2070 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2071 } else { 2072 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2073 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2074 } 2075 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2080 { 2081 PetscErrorCode ierr; 2082 2083 PetscFunctionBegin; 2084 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2085 PetscFunctionReturn(0); 2086 } 2087 2088 /* 2089 Computes the number of nonzeros per row needed for preallocation when X and Y 2090 have different nonzero structure. 2091 */ 2092 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2093 { 2094 PetscInt i,j,k,nzx,nzy; 2095 2096 PetscFunctionBegin; 2097 /* Set the number of nonzeros in the new matrix */ 2098 for (i=0; i<m; i++) { 2099 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2100 nzx = xi[i+1] - xi[i]; 2101 nzy = yi[i+1] - yi[i]; 2102 nnz[i] = 0; 2103 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2104 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2105 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2106 nnz[i]++; 2107 } 2108 for (; k<nzy; k++) nnz[i]++; 2109 } 2110 PetscFunctionReturn(0); 2111 } 2112 2113 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2114 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2115 { 2116 PetscErrorCode ierr; 2117 PetscInt m = Y->rmap->N; 2118 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2119 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2120 2121 PetscFunctionBegin; 2122 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2123 PetscFunctionReturn(0); 2124 } 2125 2126 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2127 { 2128 PetscErrorCode ierr; 2129 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2130 PetscBLASInt bnz,one=1; 2131 Mat_SeqAIJ *x,*y; 2132 2133 PetscFunctionBegin; 2134 if (str == SAME_NONZERO_PATTERN) { 2135 PetscScalar alpha = a; 2136 x = (Mat_SeqAIJ*)xx->A->data; 2137 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2138 y = (Mat_SeqAIJ*)yy->A->data; 2139 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2140 x = (Mat_SeqAIJ*)xx->B->data; 2141 y = (Mat_SeqAIJ*)yy->B->data; 2142 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2143 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2144 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2145 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2146 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2147 } else { 2148 Mat B; 2149 PetscInt *nnz_d,*nnz_o; 2150 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2151 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2152 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2153 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2154 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2155 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2156 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2157 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2158 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2159 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2160 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2161 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2162 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2163 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2169 2170 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2171 { 2172 #if defined(PETSC_USE_COMPLEX) 2173 PetscErrorCode ierr; 2174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2175 2176 PetscFunctionBegin; 2177 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2178 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2179 #else 2180 PetscFunctionBegin; 2181 #endif 2182 PetscFunctionReturn(0); 2183 } 2184 2185 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2186 { 2187 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2188 PetscErrorCode ierr; 2189 2190 PetscFunctionBegin; 2191 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2192 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2193 PetscFunctionReturn(0); 2194 } 2195 2196 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2197 { 2198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2203 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2208 { 2209 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2210 PetscErrorCode ierr; 2211 PetscInt i,*idxb = 0; 2212 PetscScalar *va,*vb; 2213 Vec vtmp; 2214 2215 PetscFunctionBegin; 2216 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2217 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2218 if (idx) { 2219 for (i=0; i<A->rmap->n; i++) { 2220 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2221 } 2222 } 2223 2224 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2225 if (idx) { 2226 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2227 } 2228 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2229 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2230 2231 for (i=0; i<A->rmap->n; i++) { 2232 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2233 va[i] = vb[i]; 2234 if (idx) idx[i] = a->garray[idxb[i]]; 2235 } 2236 } 2237 2238 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2239 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2240 ierr = PetscFree(idxb);CHKERRQ(ierr); 2241 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2242 PetscFunctionReturn(0); 2243 } 2244 2245 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2246 { 2247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2248 PetscErrorCode ierr; 2249 PetscInt i,*idxb = 0; 2250 PetscScalar *va,*vb; 2251 Vec vtmp; 2252 2253 PetscFunctionBegin; 2254 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2255 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2256 if (idx) { 2257 for (i=0; i<A->cmap->n; i++) { 2258 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2259 } 2260 } 2261 2262 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2263 if (idx) { 2264 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2265 } 2266 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2267 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2268 2269 for (i=0; i<A->rmap->n; i++) { 2270 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2271 va[i] = vb[i]; 2272 if (idx) idx[i] = a->garray[idxb[i]]; 2273 } 2274 } 2275 2276 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2277 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2278 ierr = PetscFree(idxb);CHKERRQ(ierr); 2279 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2280 PetscFunctionReturn(0); 2281 } 2282 2283 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2284 { 2285 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2286 PetscInt n = A->rmap->n; 2287 PetscInt cstart = A->cmap->rstart; 2288 PetscInt *cmap = mat->garray; 2289 PetscInt *diagIdx, *offdiagIdx; 2290 Vec diagV, offdiagV; 2291 PetscScalar *a, *diagA, *offdiagA; 2292 PetscInt r; 2293 PetscErrorCode ierr; 2294 2295 PetscFunctionBegin; 2296 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2297 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2298 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2299 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2300 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2301 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2302 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2303 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2304 for (r = 0; r < n; ++r) { 2305 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2306 a[r] = diagA[r]; 2307 idx[r] = cstart + diagIdx[r]; 2308 } else { 2309 a[r] = offdiagA[r]; 2310 idx[r] = cmap[offdiagIdx[r]]; 2311 } 2312 } 2313 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2314 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2315 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2316 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2317 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2318 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2323 { 2324 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2325 PetscInt n = A->rmap->n; 2326 PetscInt cstart = A->cmap->rstart; 2327 PetscInt *cmap = mat->garray; 2328 PetscInt *diagIdx, *offdiagIdx; 2329 Vec diagV, offdiagV; 2330 PetscScalar *a, *diagA, *offdiagA; 2331 PetscInt r; 2332 PetscErrorCode ierr; 2333 2334 PetscFunctionBegin; 2335 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2336 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2337 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2338 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2339 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2340 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2341 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2342 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2343 for (r = 0; r < n; ++r) { 2344 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2345 a[r] = diagA[r]; 2346 idx[r] = cstart + diagIdx[r]; 2347 } else { 2348 a[r] = offdiagA[r]; 2349 idx[r] = cmap[offdiagIdx[r]]; 2350 } 2351 } 2352 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2353 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2354 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2355 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2356 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2357 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2358 PetscFunctionReturn(0); 2359 } 2360 2361 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2362 { 2363 PetscErrorCode ierr; 2364 Mat *dummy; 2365 2366 PetscFunctionBegin; 2367 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2368 *newmat = *dummy; 2369 ierr = PetscFree(dummy);CHKERRQ(ierr); 2370 PetscFunctionReturn(0); 2371 } 2372 2373 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2374 { 2375 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2376 PetscErrorCode ierr; 2377 2378 PetscFunctionBegin; 2379 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2380 A->factorerrortype = a->A->factorerrortype; 2381 PetscFunctionReturn(0); 2382 } 2383 2384 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2385 { 2386 PetscErrorCode ierr; 2387 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2388 2389 PetscFunctionBegin; 2390 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2391 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2392 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2393 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2398 { 2399 PetscFunctionBegin; 2400 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2401 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2402 PetscFunctionReturn(0); 2403 } 2404 2405 /*@ 2406 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2407 2408 Collective on Mat 2409 2410 Input Parameters: 2411 + A - the matrix 2412 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2413 2414 Level: advanced 2415 2416 @*/ 2417 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2418 { 2419 PetscErrorCode ierr; 2420 2421 PetscFunctionBegin; 2422 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2423 PetscFunctionReturn(0); 2424 } 2425 2426 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2427 { 2428 PetscErrorCode ierr; 2429 PetscBool sc = PETSC_FALSE,flg; 2430 2431 PetscFunctionBegin; 2432 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2433 ierr = PetscObjectOptionsBegin((PetscObject)A); 2434 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2435 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2436 if (flg) { 2437 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2438 } 2439 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2444 { 2445 PetscErrorCode ierr; 2446 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2447 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2448 2449 PetscFunctionBegin; 2450 if (!Y->preallocated) { 2451 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2452 } else if (!aij->nz) { 2453 PetscInt nonew = aij->nonew; 2454 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2455 aij->nonew = nonew; 2456 } 2457 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2458 PetscFunctionReturn(0); 2459 } 2460 2461 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2462 { 2463 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2464 PetscErrorCode ierr; 2465 2466 PetscFunctionBegin; 2467 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2468 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2469 if (d) { 2470 PetscInt rstart; 2471 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2472 *d += rstart; 2473 2474 } 2475 PetscFunctionReturn(0); 2476 } 2477 2478 2479 /* -------------------------------------------------------------------*/ 2480 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2481 MatGetRow_MPIAIJ, 2482 MatRestoreRow_MPIAIJ, 2483 MatMult_MPIAIJ, 2484 /* 4*/ MatMultAdd_MPIAIJ, 2485 MatMultTranspose_MPIAIJ, 2486 MatMultTransposeAdd_MPIAIJ, 2487 0, 2488 0, 2489 0, 2490 /*10*/ 0, 2491 0, 2492 0, 2493 MatSOR_MPIAIJ, 2494 MatTranspose_MPIAIJ, 2495 /*15*/ MatGetInfo_MPIAIJ, 2496 MatEqual_MPIAIJ, 2497 MatGetDiagonal_MPIAIJ, 2498 MatDiagonalScale_MPIAIJ, 2499 MatNorm_MPIAIJ, 2500 /*20*/ MatAssemblyBegin_MPIAIJ, 2501 MatAssemblyEnd_MPIAIJ, 2502 MatSetOption_MPIAIJ, 2503 MatZeroEntries_MPIAIJ, 2504 /*24*/ MatZeroRows_MPIAIJ, 2505 0, 2506 0, 2507 0, 2508 0, 2509 /*29*/ MatSetUp_MPIAIJ, 2510 0, 2511 0, 2512 MatGetDiagonalBlock_MPIAIJ, 2513 0, 2514 /*34*/ MatDuplicate_MPIAIJ, 2515 0, 2516 0, 2517 0, 2518 0, 2519 /*39*/ MatAXPY_MPIAIJ, 2520 MatCreateSubMatrices_MPIAIJ, 2521 MatIncreaseOverlap_MPIAIJ, 2522 MatGetValues_MPIAIJ, 2523 MatCopy_MPIAIJ, 2524 /*44*/ MatGetRowMax_MPIAIJ, 2525 MatScale_MPIAIJ, 2526 MatShift_MPIAIJ, 2527 MatDiagonalSet_MPIAIJ, 2528 MatZeroRowsColumns_MPIAIJ, 2529 /*49*/ MatSetRandom_MPIAIJ, 2530 0, 2531 0, 2532 0, 2533 0, 2534 /*54*/ MatFDColoringCreate_MPIXAIJ, 2535 0, 2536 MatSetUnfactored_MPIAIJ, 2537 MatPermute_MPIAIJ, 2538 0, 2539 /*59*/ MatCreateSubMatrix_MPIAIJ, 2540 MatDestroy_MPIAIJ, 2541 MatView_MPIAIJ, 2542 0, 2543 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2544 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2545 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2546 0, 2547 0, 2548 0, 2549 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2550 MatGetRowMinAbs_MPIAIJ, 2551 0, 2552 0, 2553 0, 2554 0, 2555 /*75*/ MatFDColoringApply_AIJ, 2556 MatSetFromOptions_MPIAIJ, 2557 0, 2558 0, 2559 MatFindZeroDiagonals_MPIAIJ, 2560 /*80*/ 0, 2561 0, 2562 0, 2563 /*83*/ MatLoad_MPIAIJ, 2564 MatIsSymmetric_MPIAIJ, 2565 0, 2566 0, 2567 0, 2568 0, 2569 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2570 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2571 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2572 MatPtAP_MPIAIJ_MPIAIJ, 2573 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2574 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2575 0, 2576 0, 2577 0, 2578 0, 2579 /*99*/ 0, 2580 0, 2581 0, 2582 MatConjugate_MPIAIJ, 2583 0, 2584 /*104*/MatSetValuesRow_MPIAIJ, 2585 MatRealPart_MPIAIJ, 2586 MatImaginaryPart_MPIAIJ, 2587 0, 2588 0, 2589 /*109*/0, 2590 0, 2591 MatGetRowMin_MPIAIJ, 2592 0, 2593 MatMissingDiagonal_MPIAIJ, 2594 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2595 0, 2596 MatGetGhosts_MPIAIJ, 2597 0, 2598 0, 2599 /*119*/0, 2600 0, 2601 0, 2602 0, 2603 MatGetMultiProcBlock_MPIAIJ, 2604 /*124*/MatFindNonzeroRows_MPIAIJ, 2605 MatGetColumnNorms_MPIAIJ, 2606 MatInvertBlockDiagonal_MPIAIJ, 2607 0, 2608 MatCreateSubMatricesMPI_MPIAIJ, 2609 /*129*/0, 2610 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2611 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2612 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2613 0, 2614 /*134*/0, 2615 0, 2616 MatRARt_MPIAIJ_MPIAIJ, 2617 0, 2618 0, 2619 /*139*/MatSetBlockSizes_MPIAIJ, 2620 0, 2621 0, 2622 MatFDColoringSetUp_MPIXAIJ, 2623 MatFindOffBlockDiagonalEntries_MPIAIJ, 2624 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2625 }; 2626 2627 /* ----------------------------------------------------------------------------------------*/ 2628 2629 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2630 { 2631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2632 PetscErrorCode ierr; 2633 2634 PetscFunctionBegin; 2635 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2636 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2637 PetscFunctionReturn(0); 2638 } 2639 2640 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2641 { 2642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2643 PetscErrorCode ierr; 2644 2645 PetscFunctionBegin; 2646 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2647 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2648 PetscFunctionReturn(0); 2649 } 2650 2651 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2652 { 2653 Mat_MPIAIJ *b; 2654 PetscErrorCode ierr; 2655 2656 PetscFunctionBegin; 2657 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2658 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2659 b = (Mat_MPIAIJ*)B->data; 2660 2661 #if defined(PETSC_USE_CTABLE) 2662 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2663 #else 2664 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2665 #endif 2666 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2667 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2668 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2669 2670 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2671 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2672 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2673 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2674 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2675 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2676 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2677 2678 if (!B->preallocated) { 2679 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2680 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2681 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2682 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2683 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2684 } 2685 2686 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2687 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2688 B->preallocated = PETSC_TRUE; 2689 B->was_assembled = PETSC_FALSE; 2690 B->assembled = PETSC_FALSE;; 2691 PetscFunctionReturn(0); 2692 } 2693 2694 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2695 { 2696 Mat_MPIAIJ *b; 2697 PetscErrorCode ierr; 2698 2699 PetscFunctionBegin; 2700 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2701 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2702 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2703 b = (Mat_MPIAIJ*)B->data; 2704 2705 #if defined(PETSC_USE_CTABLE) 2706 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2707 #else 2708 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2709 #endif 2710 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2711 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2712 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2713 2714 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2715 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2716 B->preallocated = PETSC_TRUE; 2717 B->was_assembled = PETSC_FALSE; 2718 B->assembled = PETSC_FALSE; 2719 PetscFunctionReturn(0); 2720 } 2721 2722 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2723 { 2724 Mat mat; 2725 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2726 PetscErrorCode ierr; 2727 2728 PetscFunctionBegin; 2729 *newmat = 0; 2730 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2731 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2732 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2733 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2734 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2735 a = (Mat_MPIAIJ*)mat->data; 2736 2737 mat->factortype = matin->factortype; 2738 mat->assembled = PETSC_TRUE; 2739 mat->insertmode = NOT_SET_VALUES; 2740 mat->preallocated = PETSC_TRUE; 2741 2742 a->size = oldmat->size; 2743 a->rank = oldmat->rank; 2744 a->donotstash = oldmat->donotstash; 2745 a->roworiented = oldmat->roworiented; 2746 a->rowindices = 0; 2747 a->rowvalues = 0; 2748 a->getrowactive = PETSC_FALSE; 2749 2750 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2751 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2752 2753 if (oldmat->colmap) { 2754 #if defined(PETSC_USE_CTABLE) 2755 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2756 #else 2757 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2758 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2759 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2760 #endif 2761 } else a->colmap = 0; 2762 if (oldmat->garray) { 2763 PetscInt len; 2764 len = oldmat->B->cmap->n; 2765 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2766 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2767 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2768 } else a->garray = 0; 2769 2770 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2771 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2772 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2773 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2774 2775 if (oldmat->Mvctx_mpi1) { 2776 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2777 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2778 } 2779 2780 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2781 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2782 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2783 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2784 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2785 *newmat = mat; 2786 PetscFunctionReturn(0); 2787 } 2788 2789 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2790 { 2791 PetscScalar *vals,*svals; 2792 MPI_Comm comm; 2793 PetscErrorCode ierr; 2794 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2795 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2796 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2797 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2798 PetscInt cend,cstart,n,*rowners; 2799 int fd; 2800 PetscInt bs = newMat->rmap->bs; 2801 2802 PetscFunctionBegin; 2803 /* force binary viewer to load .info file if it has not yet done so */ 2804 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2805 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2806 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2807 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2808 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2809 if (!rank) { 2810 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2811 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2812 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2813 } 2814 2815 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2816 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2817 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2818 if (bs < 0) bs = 1; 2819 2820 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2821 M = header[1]; N = header[2]; 2822 2823 /* If global sizes are set, check if they are consistent with that given in the file */ 2824 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2825 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2826 2827 /* determine ownership of all (block) rows */ 2828 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2829 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2830 else m = newMat->rmap->n; /* Set by user */ 2831 2832 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2833 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2834 2835 /* First process needs enough room for process with most rows */ 2836 if (!rank) { 2837 mmax = rowners[1]; 2838 for (i=2; i<=size; i++) { 2839 mmax = PetscMax(mmax, rowners[i]); 2840 } 2841 } else mmax = -1; /* unused, but compilers complain */ 2842 2843 rowners[0] = 0; 2844 for (i=2; i<=size; i++) { 2845 rowners[i] += rowners[i-1]; 2846 } 2847 rstart = rowners[rank]; 2848 rend = rowners[rank+1]; 2849 2850 /* distribute row lengths to all processors */ 2851 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2852 if (!rank) { 2853 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2854 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2855 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2856 for (j=0; j<m; j++) { 2857 procsnz[0] += ourlens[j]; 2858 } 2859 for (i=1; i<size; i++) { 2860 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2861 /* calculate the number of nonzeros on each processor */ 2862 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2863 procsnz[i] += rowlengths[j]; 2864 } 2865 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2866 } 2867 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2868 } else { 2869 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2870 } 2871 2872 if (!rank) { 2873 /* determine max buffer needed and allocate it */ 2874 maxnz = 0; 2875 for (i=0; i<size; i++) { 2876 maxnz = PetscMax(maxnz,procsnz[i]); 2877 } 2878 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2879 2880 /* read in my part of the matrix column indices */ 2881 nz = procsnz[0]; 2882 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2883 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2884 2885 /* read in every one elses and ship off */ 2886 for (i=1; i<size; i++) { 2887 nz = procsnz[i]; 2888 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2889 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2890 } 2891 ierr = PetscFree(cols);CHKERRQ(ierr); 2892 } else { 2893 /* determine buffer space needed for message */ 2894 nz = 0; 2895 for (i=0; i<m; i++) { 2896 nz += ourlens[i]; 2897 } 2898 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2899 2900 /* receive message of column indices*/ 2901 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2902 } 2903 2904 /* determine column ownership if matrix is not square */ 2905 if (N != M) { 2906 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2907 else n = newMat->cmap->n; 2908 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2909 cstart = cend - n; 2910 } else { 2911 cstart = rstart; 2912 cend = rend; 2913 n = cend - cstart; 2914 } 2915 2916 /* loop over local rows, determining number of off diagonal entries */ 2917 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2918 jj = 0; 2919 for (i=0; i<m; i++) { 2920 for (j=0; j<ourlens[i]; j++) { 2921 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2922 jj++; 2923 } 2924 } 2925 2926 for (i=0; i<m; i++) { 2927 ourlens[i] -= offlens[i]; 2928 } 2929 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2930 2931 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2932 2933 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2934 2935 for (i=0; i<m; i++) { 2936 ourlens[i] += offlens[i]; 2937 } 2938 2939 if (!rank) { 2940 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2941 2942 /* read in my part of the matrix numerical values */ 2943 nz = procsnz[0]; 2944 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2945 2946 /* insert into matrix */ 2947 jj = rstart; 2948 smycols = mycols; 2949 svals = vals; 2950 for (i=0; i<m; i++) { 2951 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2952 smycols += ourlens[i]; 2953 svals += ourlens[i]; 2954 jj++; 2955 } 2956 2957 /* read in other processors and ship out */ 2958 for (i=1; i<size; i++) { 2959 nz = procsnz[i]; 2960 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2961 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2962 } 2963 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2964 } else { 2965 /* receive numeric values */ 2966 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2967 2968 /* receive message of values*/ 2969 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2970 2971 /* insert into matrix */ 2972 jj = rstart; 2973 smycols = mycols; 2974 svals = vals; 2975 for (i=0; i<m; i++) { 2976 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2977 smycols += ourlens[i]; 2978 svals += ourlens[i]; 2979 jj++; 2980 } 2981 } 2982 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2983 ierr = PetscFree(vals);CHKERRQ(ierr); 2984 ierr = PetscFree(mycols);CHKERRQ(ierr); 2985 ierr = PetscFree(rowners);CHKERRQ(ierr); 2986 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2987 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2988 PetscFunctionReturn(0); 2989 } 2990 2991 /* Not scalable because of ISAllGather() unless getting all columns. */ 2992 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2993 { 2994 PetscErrorCode ierr; 2995 IS iscol_local; 2996 PetscBool isstride; 2997 PetscMPIInt lisstride=0,gisstride; 2998 2999 PetscFunctionBegin; 3000 /* check if we are grabbing all columns*/ 3001 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3002 3003 if (isstride) { 3004 PetscInt start,len,mstart,mlen; 3005 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3006 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3007 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3008 if (mstart == start && mlen-mstart == len) lisstride = 1; 3009 } 3010 3011 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3012 if (gisstride) { 3013 PetscInt N; 3014 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3015 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3016 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3017 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3018 } else { 3019 PetscInt cbs; 3020 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3021 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3022 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3023 } 3024 3025 *isseq = iscol_local; 3026 PetscFunctionReturn(0); 3027 } 3028 3029 /* 3030 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3031 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3032 3033 Input Parameters: 3034 mat - matrix 3035 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3036 i.e., mat->rstart <= isrow[i] < mat->rend 3037 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3038 i.e., mat->cstart <= iscol[i] < mat->cend 3039 Output Parameter: 3040 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3041 iscol_o - sequential column index set for retrieving mat->B 3042 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3043 */ 3044 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3045 { 3046 PetscErrorCode ierr; 3047 Vec x,cmap; 3048 const PetscInt *is_idx; 3049 PetscScalar *xarray,*cmaparray; 3050 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3051 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3052 Mat B=a->B; 3053 Vec lvec=a->lvec,lcmap; 3054 PetscInt i,cstart,cend,Bn=B->cmap->N; 3055 MPI_Comm comm; 3056 VecScatter Mvctx=a->Mvctx; 3057 3058 PetscFunctionBegin; 3059 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3060 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3061 3062 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3063 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3064 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3065 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3066 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3067 3068 /* Get start indices */ 3069 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3070 isstart -= ncols; 3071 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3072 3073 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3074 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3075 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3076 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3077 for (i=0; i<ncols; i++) { 3078 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3079 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3080 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3081 } 3082 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3083 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3084 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3085 3086 /* Get iscol_d */ 3087 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3088 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3089 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3090 3091 /* Get isrow_d */ 3092 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3093 rstart = mat->rmap->rstart; 3094 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3095 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3096 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3097 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3098 3099 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3100 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3101 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3102 3103 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3104 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3105 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3106 3107 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3108 3109 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3110 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3111 3112 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3113 /* off-process column indices */ 3114 count = 0; 3115 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3116 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3117 3118 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3119 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3120 for (i=0; i<Bn; i++) { 3121 if (PetscRealPart(xarray[i]) > -1.0) { 3122 idx[count] = i; /* local column index in off-diagonal part B */ 3123 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3124 count++; 3125 } 3126 } 3127 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3128 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3129 3130 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3131 /* cannot ensure iscol_o has same blocksize as iscol! */ 3132 3133 ierr = PetscFree(idx);CHKERRQ(ierr); 3134 *garray = cmap1; 3135 3136 ierr = VecDestroy(&x);CHKERRQ(ierr); 3137 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3138 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3139 PetscFunctionReturn(0); 3140 } 3141 3142 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3143 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3144 { 3145 PetscErrorCode ierr; 3146 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3147 Mat M = NULL; 3148 MPI_Comm comm; 3149 IS iscol_d,isrow_d,iscol_o; 3150 Mat Asub = NULL,Bsub = NULL; 3151 PetscInt n; 3152 3153 PetscFunctionBegin; 3154 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3155 3156 if (call == MAT_REUSE_MATRIX) { 3157 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3158 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3159 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3160 3161 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3162 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3163 3164 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3165 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3166 3167 /* Update diagonal and off-diagonal portions of submat */ 3168 asub = (Mat_MPIAIJ*)(*submat)->data; 3169 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3170 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3171 if (n) { 3172 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3173 } 3174 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3175 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3176 3177 } else { /* call == MAT_INITIAL_MATRIX) */ 3178 const PetscInt *garray; 3179 PetscInt BsubN; 3180 3181 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3182 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3183 3184 /* Create local submatrices Asub and Bsub */ 3185 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3186 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3187 3188 /* Create submatrix M */ 3189 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3190 3191 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3192 asub = (Mat_MPIAIJ*)M->data; 3193 3194 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3195 n = asub->B->cmap->N; 3196 if (BsubN > n) { 3197 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3198 const PetscInt *idx; 3199 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3200 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3201 3202 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3203 j = 0; 3204 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3205 for (i=0; i<n; i++) { 3206 if (j >= BsubN) break; 3207 while (subgarray[i] > garray[j]) j++; 3208 3209 if (subgarray[i] == garray[j]) { 3210 idx_new[i] = idx[j++]; 3211 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3212 } 3213 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3214 3215 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3217 3218 } else if (BsubN < n) { 3219 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3220 } 3221 3222 ierr = PetscFree(garray);CHKERRQ(ierr); 3223 *submat = M; 3224 3225 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3226 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3227 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3228 3229 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3230 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3231 3232 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3233 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3234 } 3235 PetscFunctionReturn(0); 3236 } 3237 3238 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3239 { 3240 PetscErrorCode ierr; 3241 IS iscol_local=NULL,isrow_d; 3242 PetscInt csize; 3243 PetscInt n,i,j,start,end; 3244 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3245 MPI_Comm comm; 3246 3247 PetscFunctionBegin; 3248 /* If isrow has same processor distribution as mat, 3249 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3250 if (call == MAT_REUSE_MATRIX) { 3251 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3252 if (isrow_d) { 3253 sameRowDist = PETSC_TRUE; 3254 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3255 } else { 3256 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3257 if (iscol_local) { 3258 sameRowDist = PETSC_TRUE; 3259 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3260 } 3261 } 3262 } else { 3263 /* Check if isrow has same processor distribution as mat */ 3264 sameDist[0] = PETSC_FALSE; 3265 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3266 if (!n) { 3267 sameDist[0] = PETSC_TRUE; 3268 } else { 3269 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3270 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3271 if (i >= start && j < end) { 3272 sameDist[0] = PETSC_TRUE; 3273 } 3274 } 3275 3276 /* Check if iscol has same processor distribution as mat */ 3277 sameDist[1] = PETSC_FALSE; 3278 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3279 if (!n) { 3280 sameDist[1] = PETSC_TRUE; 3281 } else { 3282 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3283 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3284 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3285 } 3286 3287 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3288 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3289 sameRowDist = tsameDist[0]; 3290 } 3291 3292 if (sameRowDist) { 3293 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3294 /* isrow and iscol have same processor distribution as mat */ 3295 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3296 PetscFunctionReturn(0); 3297 } else { /* sameRowDist */ 3298 /* isrow has same processor distribution as mat */ 3299 if (call == MAT_INITIAL_MATRIX) { 3300 PetscBool sorted; 3301 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3302 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3303 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3304 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3305 3306 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3307 if (sorted) { 3308 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3309 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3310 PetscFunctionReturn(0); 3311 } 3312 } else { /* call == MAT_REUSE_MATRIX */ 3313 IS iscol_sub; 3314 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3315 if (iscol_sub) { 3316 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3317 PetscFunctionReturn(0); 3318 } 3319 } 3320 } 3321 } 3322 3323 /* General case: iscol -> iscol_local which has global size of iscol */ 3324 if (call == MAT_REUSE_MATRIX) { 3325 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3326 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3327 } else { 3328 if (!iscol_local) { 3329 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3330 } 3331 } 3332 3333 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3334 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3335 3336 if (call == MAT_INITIAL_MATRIX) { 3337 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3338 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3339 } 3340 PetscFunctionReturn(0); 3341 } 3342 3343 /*@C 3344 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3345 and "off-diagonal" part of the matrix in CSR format. 3346 3347 Collective on MPI_Comm 3348 3349 Input Parameters: 3350 + comm - MPI communicator 3351 . A - "diagonal" portion of matrix 3352 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3353 - garray - global index of B columns 3354 3355 Output Parameter: 3356 . mat - the matrix, with input A as its local diagonal matrix 3357 Level: advanced 3358 3359 Notes: 3360 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3361 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3362 3363 .seealso: MatCreateMPIAIJWithSplitArrays() 3364 @*/ 3365 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3366 { 3367 PetscErrorCode ierr; 3368 Mat_MPIAIJ *maij; 3369 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3370 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3371 PetscScalar *oa=b->a; 3372 Mat Bnew; 3373 PetscInt m,n,N; 3374 3375 PetscFunctionBegin; 3376 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3377 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3378 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3379 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3380 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3381 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3382 3383 /* Get global columns of mat */ 3384 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3385 3386 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3387 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3388 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3389 maij = (Mat_MPIAIJ*)(*mat)->data; 3390 3391 (*mat)->preallocated = PETSC_TRUE; 3392 3393 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3394 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3395 3396 /* Set A as diagonal portion of *mat */ 3397 maij->A = A; 3398 3399 nz = oi[m]; 3400 for (i=0; i<nz; i++) { 3401 col = oj[i]; 3402 oj[i] = garray[col]; 3403 } 3404 3405 /* Set Bnew as off-diagonal portion of *mat */ 3406 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3407 bnew = (Mat_SeqAIJ*)Bnew->data; 3408 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3409 maij->B = Bnew; 3410 3411 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3412 3413 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3414 b->free_a = PETSC_FALSE; 3415 b->free_ij = PETSC_FALSE; 3416 ierr = MatDestroy(&B);CHKERRQ(ierr); 3417 3418 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3419 bnew->free_a = PETSC_TRUE; 3420 bnew->free_ij = PETSC_TRUE; 3421 3422 /* condense columns of maij->B */ 3423 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3424 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3425 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3426 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3427 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3428 PetscFunctionReturn(0); 3429 } 3430 3431 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3432 3433 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3434 { 3435 PetscErrorCode ierr; 3436 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3437 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3438 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3439 Mat M,Msub,B=a->B; 3440 MatScalar *aa; 3441 Mat_SeqAIJ *aij; 3442 PetscInt *garray = a->garray,*colsub,Ncols; 3443 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3444 IS iscol_sub,iscmap; 3445 const PetscInt *is_idx,*cmap; 3446 PetscBool allcolumns=PETSC_FALSE; 3447 MPI_Comm comm; 3448 3449 PetscFunctionBegin; 3450 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3451 3452 if (call == MAT_REUSE_MATRIX) { 3453 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3454 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3455 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3456 3457 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3458 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3459 3460 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3461 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3462 3463 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3464 3465 } else { /* call == MAT_INITIAL_MATRIX) */ 3466 PetscBool flg; 3467 3468 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3469 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3470 3471 /* (1) iscol -> nonscalable iscol_local */ 3472 /* Check for special case: each processor gets entire matrix columns */ 3473 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3474 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3475 if (allcolumns) { 3476 iscol_sub = iscol_local; 3477 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3478 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3479 3480 } else { 3481 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3482 PetscInt *idx,*cmap1,k; 3483 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3484 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3485 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3486 count = 0; 3487 k = 0; 3488 for (i=0; i<Ncols; i++) { 3489 j = is_idx[i]; 3490 if (j >= cstart && j < cend) { 3491 /* diagonal part of mat */ 3492 idx[count] = j; 3493 cmap1[count++] = i; /* column index in submat */ 3494 } else if (Bn) { 3495 /* off-diagonal part of mat */ 3496 if (j == garray[k]) { 3497 idx[count] = j; 3498 cmap1[count++] = i; /* column index in submat */ 3499 } else if (j > garray[k]) { 3500 while (j > garray[k] && k < Bn-1) k++; 3501 if (j == garray[k]) { 3502 idx[count] = j; 3503 cmap1[count++] = i; /* column index in submat */ 3504 } 3505 } 3506 } 3507 } 3508 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3509 3510 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3511 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3512 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3513 3514 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3515 } 3516 3517 /* (3) Create sequential Msub */ 3518 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3519 } 3520 3521 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3522 aij = (Mat_SeqAIJ*)(Msub)->data; 3523 ii = aij->i; 3524 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3525 3526 /* 3527 m - number of local rows 3528 Ncols - number of columns (same on all processors) 3529 rstart - first row in new global matrix generated 3530 */ 3531 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3532 3533 if (call == MAT_INITIAL_MATRIX) { 3534 /* (4) Create parallel newmat */ 3535 PetscMPIInt rank,size; 3536 PetscInt csize; 3537 3538 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3539 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3540 3541 /* 3542 Determine the number of non-zeros in the diagonal and off-diagonal 3543 portions of the matrix in order to do correct preallocation 3544 */ 3545 3546 /* first get start and end of "diagonal" columns */ 3547 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3548 if (csize == PETSC_DECIDE) { 3549 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3550 if (mglobal == Ncols) { /* square matrix */ 3551 nlocal = m; 3552 } else { 3553 nlocal = Ncols/size + ((Ncols % size) > rank); 3554 } 3555 } else { 3556 nlocal = csize; 3557 } 3558 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3559 rstart = rend - nlocal; 3560 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3561 3562 /* next, compute all the lengths */ 3563 jj = aij->j; 3564 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3565 olens = dlens + m; 3566 for (i=0; i<m; i++) { 3567 jend = ii[i+1] - ii[i]; 3568 olen = 0; 3569 dlen = 0; 3570 for (j=0; j<jend; j++) { 3571 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3572 else dlen++; 3573 jj++; 3574 } 3575 olens[i] = olen; 3576 dlens[i] = dlen; 3577 } 3578 3579 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3580 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3581 3582 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3583 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3584 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3585 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3586 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3587 ierr = PetscFree(dlens);CHKERRQ(ierr); 3588 3589 } else { /* call == MAT_REUSE_MATRIX */ 3590 M = *newmat; 3591 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3592 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3593 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3594 /* 3595 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3596 rather than the slower MatSetValues(). 3597 */ 3598 M->was_assembled = PETSC_TRUE; 3599 M->assembled = PETSC_FALSE; 3600 } 3601 3602 /* (5) Set values of Msub to *newmat */ 3603 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3604 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3605 3606 jj = aij->j; 3607 aa = aij->a; 3608 for (i=0; i<m; i++) { 3609 row = rstart + i; 3610 nz = ii[i+1] - ii[i]; 3611 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3612 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3613 jj += nz; aa += nz; 3614 } 3615 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3616 3617 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3618 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3619 3620 ierr = PetscFree(colsub);CHKERRQ(ierr); 3621 3622 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3623 if (call == MAT_INITIAL_MATRIX) { 3624 *newmat = M; 3625 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3626 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3627 3628 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3629 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3630 3631 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3632 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3633 3634 if (iscol_local) { 3635 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3636 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3637 } 3638 } 3639 PetscFunctionReturn(0); 3640 } 3641 3642 /* 3643 Not great since it makes two copies of the submatrix, first an SeqAIJ 3644 in local and then by concatenating the local matrices the end result. 3645 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3646 3647 Note: This requires a sequential iscol with all indices. 3648 */ 3649 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3650 { 3651 PetscErrorCode ierr; 3652 PetscMPIInt rank,size; 3653 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3654 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3655 Mat M,Mreuse; 3656 MatScalar *aa,*vwork; 3657 MPI_Comm comm; 3658 Mat_SeqAIJ *aij; 3659 PetscBool colflag,allcolumns=PETSC_FALSE; 3660 3661 PetscFunctionBegin; 3662 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3663 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3664 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3665 3666 /* Check for special case: each processor gets entire matrix columns */ 3667 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3668 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3669 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3670 3671 if (call == MAT_REUSE_MATRIX) { 3672 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3673 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3674 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3675 } else { 3676 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3677 } 3678 3679 /* 3680 m - number of local rows 3681 n - number of columns (same on all processors) 3682 rstart - first row in new global matrix generated 3683 */ 3684 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3685 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3686 if (call == MAT_INITIAL_MATRIX) { 3687 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3688 ii = aij->i; 3689 jj = aij->j; 3690 3691 /* 3692 Determine the number of non-zeros in the diagonal and off-diagonal 3693 portions of the matrix in order to do correct preallocation 3694 */ 3695 3696 /* first get start and end of "diagonal" columns */ 3697 if (csize == PETSC_DECIDE) { 3698 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3699 if (mglobal == n) { /* square matrix */ 3700 nlocal = m; 3701 } else { 3702 nlocal = n/size + ((n % size) > rank); 3703 } 3704 } else { 3705 nlocal = csize; 3706 } 3707 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3708 rstart = rend - nlocal; 3709 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3710 3711 /* next, compute all the lengths */ 3712 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3713 olens = dlens + m; 3714 for (i=0; i<m; i++) { 3715 jend = ii[i+1] - ii[i]; 3716 olen = 0; 3717 dlen = 0; 3718 for (j=0; j<jend; j++) { 3719 if (*jj < rstart || *jj >= rend) olen++; 3720 else dlen++; 3721 jj++; 3722 } 3723 olens[i] = olen; 3724 dlens[i] = dlen; 3725 } 3726 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3727 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3728 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3729 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3730 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3731 ierr = PetscFree(dlens);CHKERRQ(ierr); 3732 } else { 3733 PetscInt ml,nl; 3734 3735 M = *newmat; 3736 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3737 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3738 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3739 /* 3740 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3741 rather than the slower MatSetValues(). 3742 */ 3743 M->was_assembled = PETSC_TRUE; 3744 M->assembled = PETSC_FALSE; 3745 } 3746 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3747 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3748 ii = aij->i; 3749 jj = aij->j; 3750 aa = aij->a; 3751 for (i=0; i<m; i++) { 3752 row = rstart + i; 3753 nz = ii[i+1] - ii[i]; 3754 cwork = jj; jj += nz; 3755 vwork = aa; aa += nz; 3756 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3757 } 3758 3759 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3760 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3761 *newmat = M; 3762 3763 /* save submatrix used in processor for next request */ 3764 if (call == MAT_INITIAL_MATRIX) { 3765 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3766 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3767 } 3768 PetscFunctionReturn(0); 3769 } 3770 3771 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3772 { 3773 PetscInt m,cstart, cend,j,nnz,i,d; 3774 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3775 const PetscInt *JJ; 3776 PetscScalar *values; 3777 PetscErrorCode ierr; 3778 PetscBool nooffprocentries; 3779 3780 PetscFunctionBegin; 3781 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3782 3783 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3784 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3785 m = B->rmap->n; 3786 cstart = B->cmap->rstart; 3787 cend = B->cmap->rend; 3788 rstart = B->rmap->rstart; 3789 3790 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3791 3792 #if defined(PETSC_USE_DEBUGGING) 3793 for (i=0; i<m; i++) { 3794 nnz = Ii[i+1]- Ii[i]; 3795 JJ = J + Ii[i]; 3796 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3797 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3798 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3799 } 3800 #endif 3801 3802 for (i=0; i<m; i++) { 3803 nnz = Ii[i+1]- Ii[i]; 3804 JJ = J + Ii[i]; 3805 nnz_max = PetscMax(nnz_max,nnz); 3806 d = 0; 3807 for (j=0; j<nnz; j++) { 3808 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3809 } 3810 d_nnz[i] = d; 3811 o_nnz[i] = nnz - d; 3812 } 3813 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3814 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3815 3816 if (v) values = (PetscScalar*)v; 3817 else { 3818 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3819 } 3820 3821 for (i=0; i<m; i++) { 3822 ii = i + rstart; 3823 nnz = Ii[i+1]- Ii[i]; 3824 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3825 } 3826 nooffprocentries = B->nooffprocentries; 3827 B->nooffprocentries = PETSC_TRUE; 3828 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3829 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3830 B->nooffprocentries = nooffprocentries; 3831 3832 if (!v) { 3833 ierr = PetscFree(values);CHKERRQ(ierr); 3834 } 3835 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3836 PetscFunctionReturn(0); 3837 } 3838 3839 /*@ 3840 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3841 (the default parallel PETSc format). 3842 3843 Collective on MPI_Comm 3844 3845 Input Parameters: 3846 + B - the matrix 3847 . i - the indices into j for the start of each local row (starts with zero) 3848 . j - the column indices for each local row (starts with zero) 3849 - v - optional values in the matrix 3850 3851 Level: developer 3852 3853 Notes: 3854 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3855 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3856 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3857 3858 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3859 3860 The format which is used for the sparse matrix input, is equivalent to a 3861 row-major ordering.. i.e for the following matrix, the input data expected is 3862 as shown 3863 3864 $ 1 0 0 3865 $ 2 0 3 P0 3866 $ ------- 3867 $ 4 5 6 P1 3868 $ 3869 $ Process0 [P0]: rows_owned=[0,1] 3870 $ i = {0,1,3} [size = nrow+1 = 2+1] 3871 $ j = {0,0,2} [size = 3] 3872 $ v = {1,2,3} [size = 3] 3873 $ 3874 $ Process1 [P1]: rows_owned=[2] 3875 $ i = {0,3} [size = nrow+1 = 1+1] 3876 $ j = {0,1,2} [size = 3] 3877 $ v = {4,5,6} [size = 3] 3878 3879 .keywords: matrix, aij, compressed row, sparse, parallel 3880 3881 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3882 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3883 @*/ 3884 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3885 { 3886 PetscErrorCode ierr; 3887 3888 PetscFunctionBegin; 3889 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3890 PetscFunctionReturn(0); 3891 } 3892 3893 /*@C 3894 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3895 (the default parallel PETSc format). For good matrix assembly performance 3896 the user should preallocate the matrix storage by setting the parameters 3897 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3898 performance can be increased by more than a factor of 50. 3899 3900 Collective on MPI_Comm 3901 3902 Input Parameters: 3903 + B - the matrix 3904 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3905 (same value is used for all local rows) 3906 . d_nnz - array containing the number of nonzeros in the various rows of the 3907 DIAGONAL portion of the local submatrix (possibly different for each row) 3908 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3909 The size of this array is equal to the number of local rows, i.e 'm'. 3910 For matrices that will be factored, you must leave room for (and set) 3911 the diagonal entry even if it is zero. 3912 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3913 submatrix (same value is used for all local rows). 3914 - o_nnz - array containing the number of nonzeros in the various rows of the 3915 OFF-DIAGONAL portion of the local submatrix (possibly different for 3916 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3917 structure. The size of this array is equal to the number 3918 of local rows, i.e 'm'. 3919 3920 If the *_nnz parameter is given then the *_nz parameter is ignored 3921 3922 The AIJ format (also called the Yale sparse matrix format or 3923 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3924 storage. The stored row and column indices begin with zero. 3925 See Users-Manual: ch_mat for details. 3926 3927 The parallel matrix is partitioned such that the first m0 rows belong to 3928 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3929 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3930 3931 The DIAGONAL portion of the local submatrix of a processor can be defined 3932 as the submatrix which is obtained by extraction the part corresponding to 3933 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3934 first row that belongs to the processor, r2 is the last row belonging to 3935 the this processor, and c1-c2 is range of indices of the local part of a 3936 vector suitable for applying the matrix to. This is an mxn matrix. In the 3937 common case of a square matrix, the row and column ranges are the same and 3938 the DIAGONAL part is also square. The remaining portion of the local 3939 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3940 3941 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3942 3943 You can call MatGetInfo() to get information on how effective the preallocation was; 3944 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3945 You can also run with the option -info and look for messages with the string 3946 malloc in them to see if additional memory allocation was needed. 3947 3948 Example usage: 3949 3950 Consider the following 8x8 matrix with 34 non-zero values, that is 3951 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3952 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3953 as follows: 3954 3955 .vb 3956 1 2 0 | 0 3 0 | 0 4 3957 Proc0 0 5 6 | 7 0 0 | 8 0 3958 9 0 10 | 11 0 0 | 12 0 3959 ------------------------------------- 3960 13 0 14 | 15 16 17 | 0 0 3961 Proc1 0 18 0 | 19 20 21 | 0 0 3962 0 0 0 | 22 23 0 | 24 0 3963 ------------------------------------- 3964 Proc2 25 26 27 | 0 0 28 | 29 0 3965 30 0 0 | 31 32 33 | 0 34 3966 .ve 3967 3968 This can be represented as a collection of submatrices as: 3969 3970 .vb 3971 A B C 3972 D E F 3973 G H I 3974 .ve 3975 3976 Where the submatrices A,B,C are owned by proc0, D,E,F are 3977 owned by proc1, G,H,I are owned by proc2. 3978 3979 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3980 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3981 The 'M','N' parameters are 8,8, and have the same values on all procs. 3982 3983 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3984 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3985 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3986 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3987 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3988 matrix, ans [DF] as another SeqAIJ matrix. 3989 3990 When d_nz, o_nz parameters are specified, d_nz storage elements are 3991 allocated for every row of the local diagonal submatrix, and o_nz 3992 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3993 One way to choose d_nz and o_nz is to use the max nonzerors per local 3994 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3995 In this case, the values of d_nz,o_nz are: 3996 .vb 3997 proc0 : dnz = 2, o_nz = 2 3998 proc1 : dnz = 3, o_nz = 2 3999 proc2 : dnz = 1, o_nz = 4 4000 .ve 4001 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4002 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4003 for proc3. i.e we are using 12+15+10=37 storage locations to store 4004 34 values. 4005 4006 When d_nnz, o_nnz parameters are specified, the storage is specified 4007 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4008 In the above case the values for d_nnz,o_nnz are: 4009 .vb 4010 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4011 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4012 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4013 .ve 4014 Here the space allocated is sum of all the above values i.e 34, and 4015 hence pre-allocation is perfect. 4016 4017 Level: intermediate 4018 4019 .keywords: matrix, aij, compressed row, sparse, parallel 4020 4021 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4022 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4023 @*/ 4024 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4025 { 4026 PetscErrorCode ierr; 4027 4028 PetscFunctionBegin; 4029 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4030 PetscValidType(B,1); 4031 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4032 PetscFunctionReturn(0); 4033 } 4034 4035 /*@ 4036 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4037 CSR format the local rows. 4038 4039 Collective on MPI_Comm 4040 4041 Input Parameters: 4042 + comm - MPI communicator 4043 . m - number of local rows (Cannot be PETSC_DECIDE) 4044 . n - This value should be the same as the local size used in creating the 4045 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4046 calculated if N is given) For square matrices n is almost always m. 4047 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4048 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4049 . i - row indices 4050 . j - column indices 4051 - a - matrix values 4052 4053 Output Parameter: 4054 . mat - the matrix 4055 4056 Level: intermediate 4057 4058 Notes: 4059 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4060 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4061 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4062 4063 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4064 4065 The format which is used for the sparse matrix input, is equivalent to a 4066 row-major ordering.. i.e for the following matrix, the input data expected is 4067 as shown 4068 4069 $ 1 0 0 4070 $ 2 0 3 P0 4071 $ ------- 4072 $ 4 5 6 P1 4073 $ 4074 $ Process0 [P0]: rows_owned=[0,1] 4075 $ i = {0,1,3} [size = nrow+1 = 2+1] 4076 $ j = {0,0,2} [size = 3] 4077 $ v = {1,2,3} [size = 3] 4078 $ 4079 $ Process1 [P1]: rows_owned=[2] 4080 $ i = {0,3} [size = nrow+1 = 1+1] 4081 $ j = {0,1,2} [size = 3] 4082 $ v = {4,5,6} [size = 3] 4083 4084 .keywords: matrix, aij, compressed row, sparse, parallel 4085 4086 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4087 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4088 @*/ 4089 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4090 { 4091 PetscErrorCode ierr; 4092 4093 PetscFunctionBegin; 4094 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4095 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4096 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4097 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4098 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4099 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4100 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4101 PetscFunctionReturn(0); 4102 } 4103 4104 /*@C 4105 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4106 (the default parallel PETSc format). For good matrix assembly performance 4107 the user should preallocate the matrix storage by setting the parameters 4108 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4109 performance can be increased by more than a factor of 50. 4110 4111 Collective on MPI_Comm 4112 4113 Input Parameters: 4114 + comm - MPI communicator 4115 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4116 This value should be the same as the local size used in creating the 4117 y vector for the matrix-vector product y = Ax. 4118 . n - This value should be the same as the local size used in creating the 4119 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4120 calculated if N is given) For square matrices n is almost always m. 4121 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4122 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4123 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4124 (same value is used for all local rows) 4125 . d_nnz - array containing the number of nonzeros in the various rows of the 4126 DIAGONAL portion of the local submatrix (possibly different for each row) 4127 or NULL, if d_nz is used to specify the nonzero structure. 4128 The size of this array is equal to the number of local rows, i.e 'm'. 4129 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4130 submatrix (same value is used for all local rows). 4131 - o_nnz - array containing the number of nonzeros in the various rows of the 4132 OFF-DIAGONAL portion of the local submatrix (possibly different for 4133 each row) or NULL, if o_nz is used to specify the nonzero 4134 structure. The size of this array is equal to the number 4135 of local rows, i.e 'm'. 4136 4137 Output Parameter: 4138 . A - the matrix 4139 4140 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4141 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4142 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4143 4144 Notes: 4145 If the *_nnz parameter is given then the *_nz parameter is ignored 4146 4147 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4148 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4149 storage requirements for this matrix. 4150 4151 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4152 processor than it must be used on all processors that share the object for 4153 that argument. 4154 4155 The user MUST specify either the local or global matrix dimensions 4156 (possibly both). 4157 4158 The parallel matrix is partitioned across processors such that the 4159 first m0 rows belong to process 0, the next m1 rows belong to 4160 process 1, the next m2 rows belong to process 2 etc.. where 4161 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4162 values corresponding to [m x N] submatrix. 4163 4164 The columns are logically partitioned with the n0 columns belonging 4165 to 0th partition, the next n1 columns belonging to the next 4166 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4167 4168 The DIAGONAL portion of the local submatrix on any given processor 4169 is the submatrix corresponding to the rows and columns m,n 4170 corresponding to the given processor. i.e diagonal matrix on 4171 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4172 etc. The remaining portion of the local submatrix [m x (N-n)] 4173 constitute the OFF-DIAGONAL portion. The example below better 4174 illustrates this concept. 4175 4176 For a square global matrix we define each processor's diagonal portion 4177 to be its local rows and the corresponding columns (a square submatrix); 4178 each processor's off-diagonal portion encompasses the remainder of the 4179 local matrix (a rectangular submatrix). 4180 4181 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4182 4183 When calling this routine with a single process communicator, a matrix of 4184 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4185 type of communicator, use the construction mechanism 4186 .vb 4187 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4188 .ve 4189 4190 $ MatCreate(...,&A); 4191 $ MatSetType(A,MATMPIAIJ); 4192 $ MatSetSizes(A, m,n,M,N); 4193 $ MatMPIAIJSetPreallocation(A,...); 4194 4195 By default, this format uses inodes (identical nodes) when possible. 4196 We search for consecutive rows with the same nonzero structure, thereby 4197 reusing matrix information to achieve increased efficiency. 4198 4199 Options Database Keys: 4200 + -mat_no_inode - Do not use inodes 4201 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4202 - -mat_aij_oneindex - Internally use indexing starting at 1 4203 rather than 0. Note that when calling MatSetValues(), 4204 the user still MUST index entries starting at 0! 4205 4206 4207 Example usage: 4208 4209 Consider the following 8x8 matrix with 34 non-zero values, that is 4210 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4211 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4212 as follows 4213 4214 .vb 4215 1 2 0 | 0 3 0 | 0 4 4216 Proc0 0 5 6 | 7 0 0 | 8 0 4217 9 0 10 | 11 0 0 | 12 0 4218 ------------------------------------- 4219 13 0 14 | 15 16 17 | 0 0 4220 Proc1 0 18 0 | 19 20 21 | 0 0 4221 0 0 0 | 22 23 0 | 24 0 4222 ------------------------------------- 4223 Proc2 25 26 27 | 0 0 28 | 29 0 4224 30 0 0 | 31 32 33 | 0 34 4225 .ve 4226 4227 This can be represented as a collection of submatrices as 4228 4229 .vb 4230 A B C 4231 D E F 4232 G H I 4233 .ve 4234 4235 Where the submatrices A,B,C are owned by proc0, D,E,F are 4236 owned by proc1, G,H,I are owned by proc2. 4237 4238 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4239 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4240 The 'M','N' parameters are 8,8, and have the same values on all procs. 4241 4242 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4243 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4244 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4245 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4246 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4247 matrix, ans [DF] as another SeqAIJ matrix. 4248 4249 When d_nz, o_nz parameters are specified, d_nz storage elements are 4250 allocated for every row of the local diagonal submatrix, and o_nz 4251 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4252 One way to choose d_nz and o_nz is to use the max nonzerors per local 4253 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4254 In this case, the values of d_nz,o_nz are 4255 .vb 4256 proc0 : dnz = 2, o_nz = 2 4257 proc1 : dnz = 3, o_nz = 2 4258 proc2 : dnz = 1, o_nz = 4 4259 .ve 4260 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4261 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4262 for proc3. i.e we are using 12+15+10=37 storage locations to store 4263 34 values. 4264 4265 When d_nnz, o_nnz parameters are specified, the storage is specified 4266 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4267 In the above case the values for d_nnz,o_nnz are 4268 .vb 4269 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4270 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4271 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4272 .ve 4273 Here the space allocated is sum of all the above values i.e 34, and 4274 hence pre-allocation is perfect. 4275 4276 Level: intermediate 4277 4278 .keywords: matrix, aij, compressed row, sparse, parallel 4279 4280 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4281 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4282 @*/ 4283 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4284 { 4285 PetscErrorCode ierr; 4286 PetscMPIInt size; 4287 4288 PetscFunctionBegin; 4289 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4290 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4291 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4292 if (size > 1) { 4293 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4294 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4295 } else { 4296 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4297 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4298 } 4299 PetscFunctionReturn(0); 4300 } 4301 4302 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4303 { 4304 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4305 PetscBool flg; 4306 PetscErrorCode ierr; 4307 4308 PetscFunctionBegin; 4309 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4310 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4311 if (Ad) *Ad = a->A; 4312 if (Ao) *Ao = a->B; 4313 if (colmap) *colmap = a->garray; 4314 PetscFunctionReturn(0); 4315 } 4316 4317 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4318 { 4319 PetscErrorCode ierr; 4320 PetscInt m,N,i,rstart,nnz,Ii; 4321 PetscInt *indx; 4322 PetscScalar *values; 4323 4324 PetscFunctionBegin; 4325 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4326 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4327 PetscInt *dnz,*onz,sum,bs,cbs; 4328 4329 if (n == PETSC_DECIDE) { 4330 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4331 } 4332 /* Check sum(n) = N */ 4333 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4334 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4335 4336 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4337 rstart -= m; 4338 4339 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4340 for (i=0; i<m; i++) { 4341 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4342 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4343 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4344 } 4345 4346 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4347 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4348 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4349 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4350 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4351 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4352 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4353 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4354 } 4355 4356 /* numeric phase */ 4357 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4358 for (i=0; i<m; i++) { 4359 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4360 Ii = i + rstart; 4361 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4362 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4363 } 4364 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4365 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4366 PetscFunctionReturn(0); 4367 } 4368 4369 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4370 { 4371 PetscErrorCode ierr; 4372 PetscMPIInt rank; 4373 PetscInt m,N,i,rstart,nnz; 4374 size_t len; 4375 const PetscInt *indx; 4376 PetscViewer out; 4377 char *name; 4378 Mat B; 4379 const PetscScalar *values; 4380 4381 PetscFunctionBegin; 4382 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4383 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4384 /* Should this be the type of the diagonal block of A? */ 4385 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4386 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4387 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4388 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4389 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4390 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4391 for (i=0; i<m; i++) { 4392 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4393 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4394 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4395 } 4396 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4397 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4398 4399 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4400 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4401 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4402 sprintf(name,"%s.%d",outfile,rank); 4403 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4404 ierr = PetscFree(name);CHKERRQ(ierr); 4405 ierr = MatView(B,out);CHKERRQ(ierr); 4406 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4407 ierr = MatDestroy(&B);CHKERRQ(ierr); 4408 PetscFunctionReturn(0); 4409 } 4410 4411 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4412 { 4413 PetscErrorCode ierr; 4414 Mat_Merge_SeqsToMPI *merge; 4415 PetscContainer container; 4416 4417 PetscFunctionBegin; 4418 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4419 if (container) { 4420 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4421 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4422 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4423 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4424 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4425 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4426 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4427 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4428 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4429 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4430 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4431 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4432 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4433 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4434 ierr = PetscFree(merge);CHKERRQ(ierr); 4435 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4436 } 4437 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4438 PetscFunctionReturn(0); 4439 } 4440 4441 #include <../src/mat/utils/freespace.h> 4442 #include <petscbt.h> 4443 4444 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4445 { 4446 PetscErrorCode ierr; 4447 MPI_Comm comm; 4448 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4449 PetscMPIInt size,rank,taga,*len_s; 4450 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4451 PetscInt proc,m; 4452 PetscInt **buf_ri,**buf_rj; 4453 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4454 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4455 MPI_Request *s_waits,*r_waits; 4456 MPI_Status *status; 4457 MatScalar *aa=a->a; 4458 MatScalar **abuf_r,*ba_i; 4459 Mat_Merge_SeqsToMPI *merge; 4460 PetscContainer container; 4461 4462 PetscFunctionBegin; 4463 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4464 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4465 4466 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4467 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4468 4469 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4470 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4471 4472 bi = merge->bi; 4473 bj = merge->bj; 4474 buf_ri = merge->buf_ri; 4475 buf_rj = merge->buf_rj; 4476 4477 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4478 owners = merge->rowmap->range; 4479 len_s = merge->len_s; 4480 4481 /* send and recv matrix values */ 4482 /*-----------------------------*/ 4483 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4484 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4485 4486 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4487 for (proc=0,k=0; proc<size; proc++) { 4488 if (!len_s[proc]) continue; 4489 i = owners[proc]; 4490 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4491 k++; 4492 } 4493 4494 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4495 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4496 ierr = PetscFree(status);CHKERRQ(ierr); 4497 4498 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4499 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4500 4501 /* insert mat values of mpimat */ 4502 /*----------------------------*/ 4503 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4504 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4505 4506 for (k=0; k<merge->nrecv; k++) { 4507 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4508 nrows = *(buf_ri_k[k]); 4509 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4510 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4511 } 4512 4513 /* set values of ba */ 4514 m = merge->rowmap->n; 4515 for (i=0; i<m; i++) { 4516 arow = owners[rank] + i; 4517 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4518 bnzi = bi[i+1] - bi[i]; 4519 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4520 4521 /* add local non-zero vals of this proc's seqmat into ba */ 4522 anzi = ai[arow+1] - ai[arow]; 4523 aj = a->j + ai[arow]; 4524 aa = a->a + ai[arow]; 4525 nextaj = 0; 4526 for (j=0; nextaj<anzi; j++) { 4527 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4528 ba_i[j] += aa[nextaj++]; 4529 } 4530 } 4531 4532 /* add received vals into ba */ 4533 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4534 /* i-th row */ 4535 if (i == *nextrow[k]) { 4536 anzi = *(nextai[k]+1) - *nextai[k]; 4537 aj = buf_rj[k] + *(nextai[k]); 4538 aa = abuf_r[k] + *(nextai[k]); 4539 nextaj = 0; 4540 for (j=0; nextaj<anzi; j++) { 4541 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4542 ba_i[j] += aa[nextaj++]; 4543 } 4544 } 4545 nextrow[k]++; nextai[k]++; 4546 } 4547 } 4548 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4549 } 4550 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4551 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4552 4553 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4554 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4555 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4556 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4557 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4558 PetscFunctionReturn(0); 4559 } 4560 4561 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4562 { 4563 PetscErrorCode ierr; 4564 Mat B_mpi; 4565 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4566 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4567 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4568 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4569 PetscInt len,proc,*dnz,*onz,bs,cbs; 4570 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4571 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4572 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4573 MPI_Status *status; 4574 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4575 PetscBT lnkbt; 4576 Mat_Merge_SeqsToMPI *merge; 4577 PetscContainer container; 4578 4579 PetscFunctionBegin; 4580 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4581 4582 /* make sure it is a PETSc comm */ 4583 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4584 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4585 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4586 4587 ierr = PetscNew(&merge);CHKERRQ(ierr); 4588 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4589 4590 /* determine row ownership */ 4591 /*---------------------------------------------------------*/ 4592 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4593 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4594 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4595 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4596 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4597 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4598 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4599 4600 m = merge->rowmap->n; 4601 owners = merge->rowmap->range; 4602 4603 /* determine the number of messages to send, their lengths */ 4604 /*---------------------------------------------------------*/ 4605 len_s = merge->len_s; 4606 4607 len = 0; /* length of buf_si[] */ 4608 merge->nsend = 0; 4609 for (proc=0; proc<size; proc++) { 4610 len_si[proc] = 0; 4611 if (proc == rank) { 4612 len_s[proc] = 0; 4613 } else { 4614 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4615 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4616 } 4617 if (len_s[proc]) { 4618 merge->nsend++; 4619 nrows = 0; 4620 for (i=owners[proc]; i<owners[proc+1]; i++) { 4621 if (ai[i+1] > ai[i]) nrows++; 4622 } 4623 len_si[proc] = 2*(nrows+1); 4624 len += len_si[proc]; 4625 } 4626 } 4627 4628 /* determine the number and length of messages to receive for ij-structure */ 4629 /*-------------------------------------------------------------------------*/ 4630 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4631 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4632 4633 /* post the Irecv of j-structure */ 4634 /*-------------------------------*/ 4635 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4636 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4637 4638 /* post the Isend of j-structure */ 4639 /*--------------------------------*/ 4640 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4641 4642 for (proc=0, k=0; proc<size; proc++) { 4643 if (!len_s[proc]) continue; 4644 i = owners[proc]; 4645 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4646 k++; 4647 } 4648 4649 /* receives and sends of j-structure are complete */ 4650 /*------------------------------------------------*/ 4651 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4652 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4653 4654 /* send and recv i-structure */ 4655 /*---------------------------*/ 4656 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4657 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4658 4659 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4660 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4661 for (proc=0,k=0; proc<size; proc++) { 4662 if (!len_s[proc]) continue; 4663 /* form outgoing message for i-structure: 4664 buf_si[0]: nrows to be sent 4665 [1:nrows]: row index (global) 4666 [nrows+1:2*nrows+1]: i-structure index 4667 */ 4668 /*-------------------------------------------*/ 4669 nrows = len_si[proc]/2 - 1; 4670 buf_si_i = buf_si + nrows+1; 4671 buf_si[0] = nrows; 4672 buf_si_i[0] = 0; 4673 nrows = 0; 4674 for (i=owners[proc]; i<owners[proc+1]; i++) { 4675 anzi = ai[i+1] - ai[i]; 4676 if (anzi) { 4677 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4678 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4679 nrows++; 4680 } 4681 } 4682 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4683 k++; 4684 buf_si += len_si[proc]; 4685 } 4686 4687 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4688 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4689 4690 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4691 for (i=0; i<merge->nrecv; i++) { 4692 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4693 } 4694 4695 ierr = PetscFree(len_si);CHKERRQ(ierr); 4696 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4697 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4698 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4699 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4700 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4701 ierr = PetscFree(status);CHKERRQ(ierr); 4702 4703 /* compute a local seq matrix in each processor */ 4704 /*----------------------------------------------*/ 4705 /* allocate bi array and free space for accumulating nonzero column info */ 4706 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4707 bi[0] = 0; 4708 4709 /* create and initialize a linked list */ 4710 nlnk = N+1; 4711 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4712 4713 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4714 len = ai[owners[rank+1]] - ai[owners[rank]]; 4715 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4716 4717 current_space = free_space; 4718 4719 /* determine symbolic info for each local row */ 4720 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4721 4722 for (k=0; k<merge->nrecv; k++) { 4723 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4724 nrows = *buf_ri_k[k]; 4725 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4726 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4727 } 4728 4729 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4730 len = 0; 4731 for (i=0; i<m; i++) { 4732 bnzi = 0; 4733 /* add local non-zero cols of this proc's seqmat into lnk */ 4734 arow = owners[rank] + i; 4735 anzi = ai[arow+1] - ai[arow]; 4736 aj = a->j + ai[arow]; 4737 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4738 bnzi += nlnk; 4739 /* add received col data into lnk */ 4740 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4741 if (i == *nextrow[k]) { /* i-th row */ 4742 anzi = *(nextai[k]+1) - *nextai[k]; 4743 aj = buf_rj[k] + *nextai[k]; 4744 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4745 bnzi += nlnk; 4746 nextrow[k]++; nextai[k]++; 4747 } 4748 } 4749 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4750 4751 /* if free space is not available, make more free space */ 4752 if (current_space->local_remaining<bnzi) { 4753 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4754 nspacedouble++; 4755 } 4756 /* copy data into free space, then initialize lnk */ 4757 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4758 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4759 4760 current_space->array += bnzi; 4761 current_space->local_used += bnzi; 4762 current_space->local_remaining -= bnzi; 4763 4764 bi[i+1] = bi[i] + bnzi; 4765 } 4766 4767 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4768 4769 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4770 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4771 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4772 4773 /* create symbolic parallel matrix B_mpi */ 4774 /*---------------------------------------*/ 4775 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4776 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4777 if (n==PETSC_DECIDE) { 4778 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4779 } else { 4780 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4781 } 4782 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4783 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4784 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4785 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4786 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4787 4788 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4789 B_mpi->assembled = PETSC_FALSE; 4790 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4791 merge->bi = bi; 4792 merge->bj = bj; 4793 merge->buf_ri = buf_ri; 4794 merge->buf_rj = buf_rj; 4795 merge->coi = NULL; 4796 merge->coj = NULL; 4797 merge->owners_co = NULL; 4798 4799 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4800 4801 /* attach the supporting struct to B_mpi for reuse */ 4802 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4803 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4804 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4805 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4806 *mpimat = B_mpi; 4807 4808 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4809 PetscFunctionReturn(0); 4810 } 4811 4812 /*@C 4813 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4814 matrices from each processor 4815 4816 Collective on MPI_Comm 4817 4818 Input Parameters: 4819 + comm - the communicators the parallel matrix will live on 4820 . seqmat - the input sequential matrices 4821 . m - number of local rows (or PETSC_DECIDE) 4822 . n - number of local columns (or PETSC_DECIDE) 4823 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4824 4825 Output Parameter: 4826 . mpimat - the parallel matrix generated 4827 4828 Level: advanced 4829 4830 Notes: 4831 The dimensions of the sequential matrix in each processor MUST be the same. 4832 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4833 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4834 @*/ 4835 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4836 { 4837 PetscErrorCode ierr; 4838 PetscMPIInt size; 4839 4840 PetscFunctionBegin; 4841 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4842 if (size == 1) { 4843 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4844 if (scall == MAT_INITIAL_MATRIX) { 4845 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4846 } else { 4847 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4848 } 4849 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4850 PetscFunctionReturn(0); 4851 } 4852 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4853 if (scall == MAT_INITIAL_MATRIX) { 4854 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4855 } 4856 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4857 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4858 PetscFunctionReturn(0); 4859 } 4860 4861 /*@ 4862 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4863 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4864 with MatGetSize() 4865 4866 Not Collective 4867 4868 Input Parameters: 4869 + A - the matrix 4870 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4871 4872 Output Parameter: 4873 . A_loc - the local sequential matrix generated 4874 4875 Level: developer 4876 4877 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4878 4879 @*/ 4880 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4881 { 4882 PetscErrorCode ierr; 4883 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4884 Mat_SeqAIJ *mat,*a,*b; 4885 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4886 MatScalar *aa,*ba,*cam; 4887 PetscScalar *ca; 4888 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4889 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4890 PetscBool match; 4891 MPI_Comm comm; 4892 PetscMPIInt size; 4893 4894 PetscFunctionBegin; 4895 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4896 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4897 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4898 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4899 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4900 4901 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4902 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4903 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4904 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4905 aa = a->a; ba = b->a; 4906 if (scall == MAT_INITIAL_MATRIX) { 4907 if (size == 1) { 4908 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4909 PetscFunctionReturn(0); 4910 } 4911 4912 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4913 ci[0] = 0; 4914 for (i=0; i<am; i++) { 4915 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4916 } 4917 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4918 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4919 k = 0; 4920 for (i=0; i<am; i++) { 4921 ncols_o = bi[i+1] - bi[i]; 4922 ncols_d = ai[i+1] - ai[i]; 4923 /* off-diagonal portion of A */ 4924 for (jo=0; jo<ncols_o; jo++) { 4925 col = cmap[*bj]; 4926 if (col >= cstart) break; 4927 cj[k] = col; bj++; 4928 ca[k++] = *ba++; 4929 } 4930 /* diagonal portion of A */ 4931 for (j=0; j<ncols_d; j++) { 4932 cj[k] = cstart + *aj++; 4933 ca[k++] = *aa++; 4934 } 4935 /* off-diagonal portion of A */ 4936 for (j=jo; j<ncols_o; j++) { 4937 cj[k] = cmap[*bj++]; 4938 ca[k++] = *ba++; 4939 } 4940 } 4941 /* put together the new matrix */ 4942 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4943 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4944 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4945 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4946 mat->free_a = PETSC_TRUE; 4947 mat->free_ij = PETSC_TRUE; 4948 mat->nonew = 0; 4949 } else if (scall == MAT_REUSE_MATRIX) { 4950 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4951 ci = mat->i; cj = mat->j; cam = mat->a; 4952 for (i=0; i<am; i++) { 4953 /* off-diagonal portion of A */ 4954 ncols_o = bi[i+1] - bi[i]; 4955 for (jo=0; jo<ncols_o; jo++) { 4956 col = cmap[*bj]; 4957 if (col >= cstart) break; 4958 *cam++ = *ba++; bj++; 4959 } 4960 /* diagonal portion of A */ 4961 ncols_d = ai[i+1] - ai[i]; 4962 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4963 /* off-diagonal portion of A */ 4964 for (j=jo; j<ncols_o; j++) { 4965 *cam++ = *ba++; bj++; 4966 } 4967 } 4968 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4969 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4970 PetscFunctionReturn(0); 4971 } 4972 4973 /*@C 4974 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4975 4976 Not Collective 4977 4978 Input Parameters: 4979 + A - the matrix 4980 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4981 - row, col - index sets of rows and columns to extract (or NULL) 4982 4983 Output Parameter: 4984 . A_loc - the local sequential matrix generated 4985 4986 Level: developer 4987 4988 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4989 4990 @*/ 4991 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4992 { 4993 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4994 PetscErrorCode ierr; 4995 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4996 IS isrowa,iscola; 4997 Mat *aloc; 4998 PetscBool match; 4999 5000 PetscFunctionBegin; 5001 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5002 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5003 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5004 if (!row) { 5005 start = A->rmap->rstart; end = A->rmap->rend; 5006 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5007 } else { 5008 isrowa = *row; 5009 } 5010 if (!col) { 5011 start = A->cmap->rstart; 5012 cmap = a->garray; 5013 nzA = a->A->cmap->n; 5014 nzB = a->B->cmap->n; 5015 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5016 ncols = 0; 5017 for (i=0; i<nzB; i++) { 5018 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5019 else break; 5020 } 5021 imark = i; 5022 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5023 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5024 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5025 } else { 5026 iscola = *col; 5027 } 5028 if (scall != MAT_INITIAL_MATRIX) { 5029 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5030 aloc[0] = *A_loc; 5031 } 5032 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5033 *A_loc = aloc[0]; 5034 ierr = PetscFree(aloc);CHKERRQ(ierr); 5035 if (!row) { 5036 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5037 } 5038 if (!col) { 5039 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5040 } 5041 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5042 PetscFunctionReturn(0); 5043 } 5044 5045 /*@C 5046 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5047 5048 Collective on Mat 5049 5050 Input Parameters: 5051 + A,B - the matrices in mpiaij format 5052 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5053 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5054 5055 Output Parameter: 5056 + rowb, colb - index sets of rows and columns of B to extract 5057 - B_seq - the sequential matrix generated 5058 5059 Level: developer 5060 5061 @*/ 5062 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5063 { 5064 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5065 PetscErrorCode ierr; 5066 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5067 IS isrowb,iscolb; 5068 Mat *bseq=NULL; 5069 5070 PetscFunctionBegin; 5071 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5072 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5073 } 5074 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5075 5076 if (scall == MAT_INITIAL_MATRIX) { 5077 start = A->cmap->rstart; 5078 cmap = a->garray; 5079 nzA = a->A->cmap->n; 5080 nzB = a->B->cmap->n; 5081 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5082 ncols = 0; 5083 for (i=0; i<nzB; i++) { /* row < local row index */ 5084 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5085 else break; 5086 } 5087 imark = i; 5088 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5089 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5090 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5091 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5092 } else { 5093 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5094 isrowb = *rowb; iscolb = *colb; 5095 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5096 bseq[0] = *B_seq; 5097 } 5098 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5099 *B_seq = bseq[0]; 5100 ierr = PetscFree(bseq);CHKERRQ(ierr); 5101 if (!rowb) { 5102 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5103 } else { 5104 *rowb = isrowb; 5105 } 5106 if (!colb) { 5107 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5108 } else { 5109 *colb = iscolb; 5110 } 5111 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5112 PetscFunctionReturn(0); 5113 } 5114 5115 /* 5116 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5117 of the OFF-DIAGONAL portion of local A 5118 5119 Collective on Mat 5120 5121 Input Parameters: 5122 + A,B - the matrices in mpiaij format 5123 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5124 5125 Output Parameter: 5126 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5127 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5128 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5129 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5130 5131 Level: developer 5132 5133 */ 5134 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5135 { 5136 VecScatter_MPI_General *gen_to,*gen_from; 5137 PetscErrorCode ierr; 5138 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5139 Mat_SeqAIJ *b_oth; 5140 VecScatter ctx; 5141 MPI_Comm comm; 5142 PetscMPIInt *rprocs,*sprocs,tag,rank; 5143 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5144 PetscInt *rvalues,*svalues; 5145 MatScalar *b_otha,*bufa,*bufA; 5146 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5147 MPI_Request *rwaits = NULL,*swaits = NULL; 5148 MPI_Status *sstatus,rstatus; 5149 PetscMPIInt jj,size; 5150 PetscInt *cols,sbs,rbs; 5151 PetscScalar *vals; 5152 5153 PetscFunctionBegin; 5154 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5155 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5156 5157 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5158 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5159 } 5160 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5161 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5162 5163 if (size == 1) { 5164 startsj_s = NULL; 5165 bufa_ptr = NULL; 5166 *B_oth = NULL; 5167 PetscFunctionReturn(0); 5168 } 5169 5170 ctx = a->Mvctx; 5171 if (a->Mvctx->mpi3 && !a->Mvctx_mpi1) { 5172 /* a->Mvctx is type of MPI3 which is not implemented for Mat-Mat ops, 5173 thus create a->Mvctx_mpi1 */ 5174 a->Mvctx_mpi1_flg = PETSC_TRUE; 5175 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5176 ctx = a->Mvctx_mpi1; 5177 } 5178 tag = ((PetscObject)ctx)->tag; 5179 5180 gen_to = (VecScatter_MPI_General*)ctx->todata; 5181 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5182 nrecvs = gen_from->n; 5183 nsends = gen_to->n; 5184 5185 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5186 srow = gen_to->indices; /* local row index to be sent */ 5187 sstarts = gen_to->starts; 5188 sprocs = gen_to->procs; 5189 sstatus = gen_to->sstatus; 5190 sbs = gen_to->bs; 5191 rstarts = gen_from->starts; 5192 rprocs = gen_from->procs; 5193 rbs = gen_from->bs; 5194 5195 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5196 if (scall == MAT_INITIAL_MATRIX) { 5197 /* i-array */ 5198 /*---------*/ 5199 /* post receives */ 5200 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5201 for (i=0; i<nrecvs; i++) { 5202 rowlen = rvalues + rstarts[i]*rbs; 5203 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5204 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5205 } 5206 5207 /* pack the outgoing message */ 5208 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5209 5210 sstartsj[0] = 0; 5211 rstartsj[0] = 0; 5212 len = 0; /* total length of j or a array to be sent */ 5213 k = 0; 5214 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5215 for (i=0; i<nsends; i++) { 5216 rowlen = svalues + sstarts[i]*sbs; 5217 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5218 for (j=0; j<nrows; j++) { 5219 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5220 for (l=0; l<sbs; l++) { 5221 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5222 5223 rowlen[j*sbs+l] = ncols; 5224 5225 len += ncols; 5226 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5227 } 5228 k++; 5229 } 5230 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5231 5232 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5233 } 5234 /* recvs and sends of i-array are completed */ 5235 i = nrecvs; 5236 while (i--) { 5237 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5238 } 5239 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5240 ierr = PetscFree(svalues);CHKERRQ(ierr); 5241 5242 /* allocate buffers for sending j and a arrays */ 5243 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5244 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5245 5246 /* create i-array of B_oth */ 5247 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5248 5249 b_othi[0] = 0; 5250 len = 0; /* total length of j or a array to be received */ 5251 k = 0; 5252 for (i=0; i<nrecvs; i++) { 5253 rowlen = rvalues + rstarts[i]*rbs; 5254 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5255 for (j=0; j<nrows; j++) { 5256 b_othi[k+1] = b_othi[k] + rowlen[j]; 5257 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5258 k++; 5259 } 5260 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5261 } 5262 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5263 5264 /* allocate space for j and a arrrays of B_oth */ 5265 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5266 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5267 5268 /* j-array */ 5269 /*---------*/ 5270 /* post receives of j-array */ 5271 for (i=0; i<nrecvs; i++) { 5272 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5273 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5274 } 5275 5276 /* pack the outgoing message j-array */ 5277 k = 0; 5278 for (i=0; i<nsends; i++) { 5279 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5280 bufJ = bufj+sstartsj[i]; 5281 for (j=0; j<nrows; j++) { 5282 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5283 for (ll=0; ll<sbs; ll++) { 5284 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5285 for (l=0; l<ncols; l++) { 5286 *bufJ++ = cols[l]; 5287 } 5288 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5289 } 5290 } 5291 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5292 } 5293 5294 /* recvs and sends of j-array are completed */ 5295 i = nrecvs; 5296 while (i--) { 5297 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5298 } 5299 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5300 } else if (scall == MAT_REUSE_MATRIX) { 5301 sstartsj = *startsj_s; 5302 rstartsj = *startsj_r; 5303 bufa = *bufa_ptr; 5304 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5305 b_otha = b_oth->a; 5306 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5307 5308 /* a-array */ 5309 /*---------*/ 5310 /* post receives of a-array */ 5311 for (i=0; i<nrecvs; i++) { 5312 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5313 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5314 } 5315 5316 /* pack the outgoing message a-array */ 5317 k = 0; 5318 for (i=0; i<nsends; i++) { 5319 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5320 bufA = bufa+sstartsj[i]; 5321 for (j=0; j<nrows; j++) { 5322 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5323 for (ll=0; ll<sbs; ll++) { 5324 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5325 for (l=0; l<ncols; l++) { 5326 *bufA++ = vals[l]; 5327 } 5328 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5329 } 5330 } 5331 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5332 } 5333 /* recvs and sends of a-array are completed */ 5334 i = nrecvs; 5335 while (i--) { 5336 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5337 } 5338 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5339 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5340 5341 if (scall == MAT_INITIAL_MATRIX) { 5342 /* put together the new matrix */ 5343 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5344 5345 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5346 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5347 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5348 b_oth->free_a = PETSC_TRUE; 5349 b_oth->free_ij = PETSC_TRUE; 5350 b_oth->nonew = 0; 5351 5352 ierr = PetscFree(bufj);CHKERRQ(ierr); 5353 if (!startsj_s || !bufa_ptr) { 5354 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5355 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5356 } else { 5357 *startsj_s = sstartsj; 5358 *startsj_r = rstartsj; 5359 *bufa_ptr = bufa; 5360 } 5361 } 5362 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5363 PetscFunctionReturn(0); 5364 } 5365 5366 /*@C 5367 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5368 5369 Not Collective 5370 5371 Input Parameters: 5372 . A - The matrix in mpiaij format 5373 5374 Output Parameter: 5375 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5376 . colmap - A map from global column index to local index into lvec 5377 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5378 5379 Level: developer 5380 5381 @*/ 5382 #if defined(PETSC_USE_CTABLE) 5383 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5384 #else 5385 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5386 #endif 5387 { 5388 Mat_MPIAIJ *a; 5389 5390 PetscFunctionBegin; 5391 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5392 PetscValidPointer(lvec, 2); 5393 PetscValidPointer(colmap, 3); 5394 PetscValidPointer(multScatter, 4); 5395 a = (Mat_MPIAIJ*) A->data; 5396 if (lvec) *lvec = a->lvec; 5397 if (colmap) *colmap = a->colmap; 5398 if (multScatter) *multScatter = a->Mvctx; 5399 PetscFunctionReturn(0); 5400 } 5401 5402 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5403 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5404 #if defined(PETSC_HAVE_MKL_SPARSE) 5405 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5406 #endif 5407 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5408 #if defined(PETSC_HAVE_ELEMENTAL) 5409 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5410 #endif 5411 #if defined(PETSC_HAVE_HYPRE) 5412 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5413 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5414 #endif 5415 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 5416 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5417 5418 /* 5419 Computes (B'*A')' since computing B*A directly is untenable 5420 5421 n p p 5422 ( ) ( ) ( ) 5423 m ( A ) * n ( B ) = m ( C ) 5424 ( ) ( ) ( ) 5425 5426 */ 5427 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5428 { 5429 PetscErrorCode ierr; 5430 Mat At,Bt,Ct; 5431 5432 PetscFunctionBegin; 5433 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5434 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5435 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5436 ierr = MatDestroy(&At);CHKERRQ(ierr); 5437 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5438 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5439 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5440 PetscFunctionReturn(0); 5441 } 5442 5443 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5444 { 5445 PetscErrorCode ierr; 5446 PetscInt m=A->rmap->n,n=B->cmap->n; 5447 Mat Cmat; 5448 5449 PetscFunctionBegin; 5450 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5451 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5452 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5453 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5454 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5455 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5456 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5457 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5458 5459 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5460 5461 *C = Cmat; 5462 PetscFunctionReturn(0); 5463 } 5464 5465 /* ----------------------------------------------------------------*/ 5466 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5467 { 5468 PetscErrorCode ierr; 5469 5470 PetscFunctionBegin; 5471 if (scall == MAT_INITIAL_MATRIX) { 5472 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5473 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5474 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5475 } 5476 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5477 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5478 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5479 PetscFunctionReturn(0); 5480 } 5481 5482 /*MC 5483 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5484 5485 Options Database Keys: 5486 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5487 5488 Level: beginner 5489 5490 .seealso: MatCreateAIJ() 5491 M*/ 5492 5493 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5494 { 5495 Mat_MPIAIJ *b; 5496 PetscErrorCode ierr; 5497 PetscMPIInt size; 5498 5499 PetscFunctionBegin; 5500 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5501 5502 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5503 B->data = (void*)b; 5504 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5505 B->assembled = PETSC_FALSE; 5506 B->insertmode = NOT_SET_VALUES; 5507 b->size = size; 5508 5509 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5510 5511 /* build cache for off array entries formed */ 5512 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5513 5514 b->donotstash = PETSC_FALSE; 5515 b->colmap = 0; 5516 b->garray = 0; 5517 b->roworiented = PETSC_TRUE; 5518 5519 /* stuff used for matrix vector multiply */ 5520 b->lvec = NULL; 5521 b->Mvctx = NULL; 5522 5523 /* stuff for MatGetRow() */ 5524 b->rowindices = 0; 5525 b->rowvalues = 0; 5526 b->getrowactive = PETSC_FALSE; 5527 5528 /* flexible pointer used in CUSP/CUSPARSE classes */ 5529 b->spptr = NULL; 5530 5531 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5532 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5533 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5534 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5535 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5536 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5537 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5538 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5539 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5540 #if defined(PETSC_HAVE_MKL_SPARSE) 5541 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5542 #endif 5543 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5544 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5545 #if defined(PETSC_HAVE_ELEMENTAL) 5546 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5547 #endif 5548 #if defined(PETSC_HAVE_HYPRE) 5549 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5550 #endif 5551 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5552 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5553 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5554 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5555 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5556 #if defined(PETSC_HAVE_HYPRE) 5557 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5558 #endif 5559 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5560 PetscFunctionReturn(0); 5561 } 5562 5563 /*@C 5564 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5565 and "off-diagonal" part of the matrix in CSR format. 5566 5567 Collective on MPI_Comm 5568 5569 Input Parameters: 5570 + comm - MPI communicator 5571 . m - number of local rows (Cannot be PETSC_DECIDE) 5572 . n - This value should be the same as the local size used in creating the 5573 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5574 calculated if N is given) For square matrices n is almost always m. 5575 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5576 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5577 . i - row indices for "diagonal" portion of matrix 5578 . j - column indices 5579 . a - matrix values 5580 . oi - row indices for "off-diagonal" portion of matrix 5581 . oj - column indices 5582 - oa - matrix values 5583 5584 Output Parameter: 5585 . mat - the matrix 5586 5587 Level: advanced 5588 5589 Notes: 5590 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5591 must free the arrays once the matrix has been destroyed and not before. 5592 5593 The i and j indices are 0 based 5594 5595 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5596 5597 This sets local rows and cannot be used to set off-processor values. 5598 5599 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5600 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5601 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5602 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5603 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5604 communication if it is known that only local entries will be set. 5605 5606 .keywords: matrix, aij, compressed row, sparse, parallel 5607 5608 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5609 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5610 @*/ 5611 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5612 { 5613 PetscErrorCode ierr; 5614 Mat_MPIAIJ *maij; 5615 5616 PetscFunctionBegin; 5617 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5618 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5619 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5620 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5621 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5622 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5623 maij = (Mat_MPIAIJ*) (*mat)->data; 5624 5625 (*mat)->preallocated = PETSC_TRUE; 5626 5627 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5628 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5629 5630 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5631 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5632 5633 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5634 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5635 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5636 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5637 5638 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5639 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5640 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5641 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5642 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5643 PetscFunctionReturn(0); 5644 } 5645 5646 /* 5647 Special version for direct calls from Fortran 5648 */ 5649 #include <petsc/private/fortranimpl.h> 5650 5651 /* Change these macros so can be used in void function */ 5652 #undef CHKERRQ 5653 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5654 #undef SETERRQ2 5655 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5656 #undef SETERRQ3 5657 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5658 #undef SETERRQ 5659 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5660 5661 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5662 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5663 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5664 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5665 #else 5666 #endif 5667 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5668 { 5669 Mat mat = *mmat; 5670 PetscInt m = *mm, n = *mn; 5671 InsertMode addv = *maddv; 5672 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5673 PetscScalar value; 5674 PetscErrorCode ierr; 5675 5676 MatCheckPreallocated(mat,1); 5677 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5678 5679 #if defined(PETSC_USE_DEBUG) 5680 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5681 #endif 5682 { 5683 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5684 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5685 PetscBool roworiented = aij->roworiented; 5686 5687 /* Some Variables required in the macro */ 5688 Mat A = aij->A; 5689 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5690 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5691 MatScalar *aa = a->a; 5692 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5693 Mat B = aij->B; 5694 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5695 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5696 MatScalar *ba = b->a; 5697 5698 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5699 PetscInt nonew = a->nonew; 5700 MatScalar *ap1,*ap2; 5701 5702 PetscFunctionBegin; 5703 for (i=0; i<m; i++) { 5704 if (im[i] < 0) continue; 5705 #if defined(PETSC_USE_DEBUG) 5706 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5707 #endif 5708 if (im[i] >= rstart && im[i] < rend) { 5709 row = im[i] - rstart; 5710 lastcol1 = -1; 5711 rp1 = aj + ai[row]; 5712 ap1 = aa + ai[row]; 5713 rmax1 = aimax[row]; 5714 nrow1 = ailen[row]; 5715 low1 = 0; 5716 high1 = nrow1; 5717 lastcol2 = -1; 5718 rp2 = bj + bi[row]; 5719 ap2 = ba + bi[row]; 5720 rmax2 = bimax[row]; 5721 nrow2 = bilen[row]; 5722 low2 = 0; 5723 high2 = nrow2; 5724 5725 for (j=0; j<n; j++) { 5726 if (roworiented) value = v[i*n+j]; 5727 else value = v[i+j*m]; 5728 if (in[j] >= cstart && in[j] < cend) { 5729 col = in[j] - cstart; 5730 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5731 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5732 } else if (in[j] < 0) continue; 5733 #if defined(PETSC_USE_DEBUG) 5734 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5735 #endif 5736 else { 5737 if (mat->was_assembled) { 5738 if (!aij->colmap) { 5739 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5740 } 5741 #if defined(PETSC_USE_CTABLE) 5742 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5743 col--; 5744 #else 5745 col = aij->colmap[in[j]] - 1; 5746 #endif 5747 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5748 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5749 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5750 col = in[j]; 5751 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5752 B = aij->B; 5753 b = (Mat_SeqAIJ*)B->data; 5754 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5755 rp2 = bj + bi[row]; 5756 ap2 = ba + bi[row]; 5757 rmax2 = bimax[row]; 5758 nrow2 = bilen[row]; 5759 low2 = 0; 5760 high2 = nrow2; 5761 bm = aij->B->rmap->n; 5762 ba = b->a; 5763 } 5764 } else col = in[j]; 5765 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5766 } 5767 } 5768 } else if (!aij->donotstash) { 5769 if (roworiented) { 5770 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5771 } else { 5772 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5773 } 5774 } 5775 } 5776 } 5777 PetscFunctionReturnVoid(); 5778 } 5779 5780