1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 22 enough exist. 23 24 Level: beginner 25 26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 27 M*/ 28 29 /*MC 30 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 31 32 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 33 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 34 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 35 for communicators controlling multiple processes. It is recommended that you call both of 36 the above preallocation routines for simplicity. 37 38 Options Database Keys: 39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 40 41 Level: beginner 42 43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 44 M*/ 45 46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 51 PetscFunctionBegin; 52 if (mat->A) { 53 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 54 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 55 } 56 PetscFunctionReturn(0); 57 } 58 59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 60 { 61 PetscErrorCode ierr; 62 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 63 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 64 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 65 const PetscInt *ia,*ib; 66 const MatScalar *aa,*bb; 67 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 68 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 69 70 PetscFunctionBegin; 71 *keptrows = 0; 72 ia = a->i; 73 ib = b->i; 74 for (i=0; i<m; i++) { 75 na = ia[i+1] - ia[i]; 76 nb = ib[i+1] - ib[i]; 77 if (!na && !nb) { 78 cnt++; 79 goto ok1; 80 } 81 aa = a->a + ia[i]; 82 for (j=0; j<na; j++) { 83 if (aa[j] != 0.0) goto ok1; 84 } 85 bb = b->a + ib[i]; 86 for (j=0; j <nb; j++) { 87 if (bb[j] != 0.0) goto ok1; 88 } 89 cnt++; 90 ok1:; 91 } 92 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 93 if (!n0rows) PetscFunctionReturn(0); 94 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 95 cnt = 0; 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) continue; 100 aa = a->a + ia[i]; 101 for (j=0; j<na;j++) { 102 if (aa[j] != 0.0) { 103 rows[cnt++] = rstart + i; 104 goto ok2; 105 } 106 } 107 bb = b->a + ib[i]; 108 for (j=0; j<nb; j++) { 109 if (bb[j] != 0.0) { 110 rows[cnt++] = rstart + i; 111 goto ok2; 112 } 113 } 114 ok2:; 115 } 116 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 121 { 122 PetscErrorCode ierr; 123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 124 125 PetscFunctionBegin; 126 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 127 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 128 } else { 129 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 130 } 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 135 { 136 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 137 PetscErrorCode ierr; 138 PetscInt i,rstart,nrows,*rows; 139 140 PetscFunctionBegin; 141 *zrows = NULL; 142 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 143 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 144 for (i=0; i<nrows; i++) rows[i] += rstart; 145 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 153 PetscInt i,n,*garray = aij->garray; 154 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 155 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 156 PetscReal *work; 157 158 PetscFunctionBegin; 159 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 160 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 161 if (type == NORM_2) { 162 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 163 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 164 } 165 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 166 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 167 } 168 } else if (type == NORM_1) { 169 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 170 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 171 } 172 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 173 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 174 } 175 } else if (type == NORM_INFINITY) { 176 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 177 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 178 } 179 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 180 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 181 } 182 183 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 184 if (type == NORM_INFINITY) { 185 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 186 } else { 187 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 188 } 189 ierr = PetscFree(work);CHKERRQ(ierr); 190 if (type == NORM_2) { 191 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 197 { 198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 199 IS sis,gis; 200 PetscErrorCode ierr; 201 const PetscInt *isis,*igis; 202 PetscInt n,*iis,nsis,ngis,rstart,i; 203 204 PetscFunctionBegin; 205 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 206 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 207 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 208 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 209 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 210 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 211 212 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 213 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 215 n = ngis + nsis; 216 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 217 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 218 for (i=0; i<n; i++) iis[i] += rstart; 219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 220 221 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 222 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 223 ierr = ISDestroy(&sis);CHKERRQ(ierr); 224 ierr = ISDestroy(&gis);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 /* 229 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 230 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 231 232 Only for square matrices 233 234 Used by a preconditioner, hence PETSC_EXTERN 235 */ 236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 237 { 238 PetscMPIInt rank,size; 239 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 240 PetscErrorCode ierr; 241 Mat mat; 242 Mat_SeqAIJ *gmata; 243 PetscMPIInt tag; 244 MPI_Status status; 245 PetscBool aij; 246 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 247 248 PetscFunctionBegin; 249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 250 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 251 if (!rank) { 252 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 253 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 254 } 255 if (reuse == MAT_INITIAL_MATRIX) { 256 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 257 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 258 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 259 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 260 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 261 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 262 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 263 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 264 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 265 266 rowners[0] = 0; 267 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 268 rstart = rowners[rank]; 269 rend = rowners[rank+1]; 270 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 271 if (!rank) { 272 gmata = (Mat_SeqAIJ*) gmat->data; 273 /* send row lengths to all processors */ 274 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 275 for (i=1; i<size; i++) { 276 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 277 } 278 /* determine number diagonal and off-diagonal counts */ 279 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 280 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 281 jj = 0; 282 for (i=0; i<m; i++) { 283 for (j=0; j<dlens[i]; j++) { 284 if (gmata->j[jj] < rstart) ld[i]++; 285 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 286 jj++; 287 } 288 } 289 /* send column indices to other processes */ 290 for (i=1; i<size; i++) { 291 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 292 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 } 295 296 /* send numerical values to other processes */ 297 for (i=1; i<size; i++) { 298 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 299 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 300 } 301 gmataa = gmata->a; 302 gmataj = gmata->j; 303 304 } else { 305 /* receive row lengths */ 306 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 307 /* receive column indices */ 308 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 309 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 310 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* determine number diagonal and off-diagonal counts */ 312 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 313 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 314 jj = 0; 315 for (i=0; i<m; i++) { 316 for (j=0; j<dlens[i]; j++) { 317 if (gmataj[jj] < rstart) ld[i]++; 318 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 319 jj++; 320 } 321 } 322 /* receive numerical values */ 323 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 324 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 325 } 326 /* set preallocation */ 327 for (i=0; i<m; i++) { 328 dlens[i] -= olens[i]; 329 } 330 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 331 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 332 333 for (i=0; i<m; i++) { 334 dlens[i] += olens[i]; 335 } 336 cnt = 0; 337 for (i=0; i<m; i++) { 338 row = rstart + i; 339 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 340 cnt += dlens[i]; 341 } 342 if (rank) { 343 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 344 } 345 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 346 ierr = PetscFree(rowners);CHKERRQ(ierr); 347 348 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 349 350 *inmat = mat; 351 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 352 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 353 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 354 mat = *inmat; 355 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 356 if (!rank) { 357 /* send numerical values to other processes */ 358 gmata = (Mat_SeqAIJ*) gmat->data; 359 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 360 gmataa = gmata->a; 361 for (i=1; i<size; i++) { 362 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 363 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 364 } 365 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 366 } else { 367 /* receive numerical values from process 0*/ 368 nz = Ad->nz + Ao->nz; 369 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 370 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 371 } 372 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 373 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 374 ad = Ad->a; 375 ao = Ao->a; 376 if (mat->rmap->n) { 377 i = 0; 378 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 379 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 380 } 381 for (i=1; i<mat->rmap->n; i++) { 382 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 i--; 386 if (mat->rmap->n) { 387 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 388 } 389 if (rank) { 390 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 391 } 392 } 393 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 /* 399 Local utility routine that creates a mapping from the global column 400 number to the local number in the off-diagonal part of the local 401 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 402 a slightly higher hash table cost; without it it is not scalable (each processor 403 has an order N integer array but is fast to acess. 404 */ 405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 406 { 407 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscInt n = aij->B->cmap->n,i; 410 411 PetscFunctionBegin; 412 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 413 #if defined(PETSC_USE_CTABLE) 414 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 415 for (i=0; i<n; i++) { 416 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 417 } 418 #else 419 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 420 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 421 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 422 #endif 423 PetscFunctionReturn(0); 424 } 425 426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 427 { \ 428 if (col <= lastcol1) low1 = 0; \ 429 else high1 = nrow1; \ 430 lastcol1 = col;\ 431 while (high1-low1 > 5) { \ 432 t = (low1+high1)/2; \ 433 if (rp1[t] > col) high1 = t; \ 434 else low1 = t; \ 435 } \ 436 for (_i=low1; _i<high1; _i++) { \ 437 if (rp1[_i] > col) break; \ 438 if (rp1[_i] == col) { \ 439 if (addv == ADD_VALUES) ap1[_i] += value; \ 440 else ap1[_i] = value; \ 441 goto a_noinsert; \ 442 } \ 443 } \ 444 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 445 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 446 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 447 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 448 N = nrow1++ - 1; a->nz++; high1++; \ 449 /* shift up all the later entries in this row */ \ 450 for (ii=N; ii>=_i; ii--) { \ 451 rp1[ii+1] = rp1[ii]; \ 452 ap1[ii+1] = ap1[ii]; \ 453 } \ 454 rp1[_i] = col; \ 455 ap1[_i] = value; \ 456 A->nonzerostate++;\ 457 a_noinsert: ; \ 458 ailen[row] = nrow1; \ 459 } 460 461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 462 { \ 463 if (col <= lastcol2) low2 = 0; \ 464 else high2 = nrow2; \ 465 lastcol2 = col; \ 466 while (high2-low2 > 5) { \ 467 t = (low2+high2)/2; \ 468 if (rp2[t] > col) high2 = t; \ 469 else low2 = t; \ 470 } \ 471 for (_i=low2; _i<high2; _i++) { \ 472 if (rp2[_i] > col) break; \ 473 if (rp2[_i] == col) { \ 474 if (addv == ADD_VALUES) ap2[_i] += value; \ 475 else ap2[_i] = value; \ 476 goto b_noinsert; \ 477 } \ 478 } \ 479 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 480 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 482 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 483 N = nrow2++ - 1; b->nz++; high2++; \ 484 /* shift up all the later entries in this row */ \ 485 for (ii=N; ii>=_i; ii--) { \ 486 rp2[ii+1] = rp2[ii]; \ 487 ap2[ii+1] = ap2[ii]; \ 488 } \ 489 rp2[_i] = col; \ 490 ap2[_i] = value; \ 491 B->nonzerostate++; \ 492 b_noinsert: ; \ 493 bilen[row] = nrow2; \ 494 } 495 496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 497 { 498 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 499 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 500 PetscErrorCode ierr; 501 PetscInt l,*garray = mat->garray,diag; 502 503 PetscFunctionBegin; 504 /* code only works for square matrices A */ 505 506 /* find size of row to the left of the diagonal part */ 507 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 508 row = row - diag; 509 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 510 if (garray[b->j[b->i[row]+l]] > diag) break; 511 } 512 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 513 514 /* diagonal part */ 515 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* right of diagonal part */ 518 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 523 { 524 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 525 PetscScalar value; 526 PetscErrorCode ierr; 527 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 528 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 529 PetscBool roworiented = aij->roworiented; 530 531 /* Some Variables required in the macro */ 532 Mat A = aij->A; 533 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 534 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 535 MatScalar *aa = a->a; 536 PetscBool ignorezeroentries = a->ignorezeroentries; 537 Mat B = aij->B; 538 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 539 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 540 MatScalar *ba = b->a; 541 542 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 543 PetscInt nonew; 544 MatScalar *ap1,*ap2; 545 546 PetscFunctionBegin; 547 for (i=0; i<m; i++) { 548 if (im[i] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 551 #endif 552 if (im[i] >= rstart && im[i] < rend) { 553 row = im[i] - rstart; 554 lastcol1 = -1; 555 rp1 = aj + ai[row]; 556 ap1 = aa + ai[row]; 557 rmax1 = aimax[row]; 558 nrow1 = ailen[row]; 559 low1 = 0; 560 high1 = nrow1; 561 lastcol2 = -1; 562 rp2 = bj + bi[row]; 563 ap2 = ba + bi[row]; 564 rmax2 = bimax[row]; 565 nrow2 = bilen[row]; 566 low2 = 0; 567 high2 = nrow2; 568 569 for (j=0; j<n; j++) { 570 if (roworiented) value = v[i*n+j]; 571 else value = v[i+j*m]; 572 if (in[j] >= cstart && in[j] < cend) { 573 col = in[j] - cstart; 574 nonew = a->nonew; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 576 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 577 } else if (in[j] < 0) continue; 578 #if defined(PETSC_USE_DEBUG) 579 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 580 #endif 581 else { 582 if (mat->was_assembled) { 583 if (!aij->colmap) { 584 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 585 } 586 #if defined(PETSC_USE_CTABLE) 587 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 593 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ*)B->data; 598 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 599 rp2 = bj + bi[row]; 600 ap2 = ba + bi[row]; 601 rmax2 = bimax[row]; 602 nrow2 = bilen[row]; 603 low2 = 0; 604 high2 = nrow2; 605 bm = aij->B->rmap->n; 606 ba = b->a; 607 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 608 } else col = in[j]; 609 nonew = b->nonew; 610 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 611 } 612 } 613 } else { 614 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 615 if (!aij->donotstash) { 616 mat->assembled = PETSC_FALSE; 617 if (roworiented) { 618 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 619 } else { 620 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 621 } 622 } 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!aij->colmap) { 649 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 669 670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 673 PetscErrorCode ierr; 674 PetscInt nstash,reallocs; 675 676 PetscFunctionBegin; 677 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 678 679 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 680 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 681 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 689 PetscErrorCode ierr; 690 PetscMPIInt n; 691 PetscInt i,j,rstart,ncols,flg; 692 PetscInt *row,*col; 693 PetscBool other_disassembled; 694 PetscScalar *val; 695 696 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 697 698 PetscFunctionBegin; 699 if (!aij->donotstash && !mat->nooffprocentries) { 700 while (1) { 701 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 702 if (!flg) break; 703 704 for (i=0; i<n; ) { 705 /* Now identify the consecutive vals belonging to the same row */ 706 for (j=i,rstart=row[j]; j<n; j++) { 707 if (row[j] != rstart) break; 708 } 709 if (j < n) ncols = j-i; 710 else ncols = n-i; 711 /* Now assemble all these values with a single function call */ 712 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 713 714 i = j; 715 } 716 } 717 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 718 } 719 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 721 722 /* determine if any processor has disassembled, if so we must 723 also disassemble ourselfs, in order that we may reassemble. */ 724 /* 725 if nonzero structure of submatrix B cannot change then we know that 726 no processor disassembled thus we can skip this stuff 727 */ 728 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 729 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 730 if (mat->was_assembled && !other_disassembled) { 731 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 732 } 733 } 734 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 735 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 736 } 737 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 738 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 739 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 740 741 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 742 743 aij->rowvalues = 0; 744 745 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 746 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 747 748 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 749 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 750 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 751 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 757 { 758 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 763 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 768 { 769 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 770 PetscInt *lrows; 771 PetscInt r, len; 772 PetscErrorCode ierr; 773 774 PetscFunctionBegin; 775 /* get locally owned rows */ 776 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 783 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 784 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 785 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 786 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 787 } 788 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 789 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 790 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 791 PetscBool cong; 792 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 793 if (cong) A->congruentlayouts = 1; 794 else A->congruentlayouts = 0; 795 } 796 if ((diag != 0.0) && A->congruentlayouts) { 797 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 798 } else if (diag != 0.0) { 799 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 800 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 801 for (r = 0; r < len; ++r) { 802 const PetscInt row = lrows[r] + A->rmap->rstart; 803 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 804 } 805 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 } else { 808 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(lrows);CHKERRQ(ierr); 811 812 /* only change matrix nonzero state if pattern was allowed to be changed */ 813 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 821 { 822 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 823 PetscErrorCode ierr; 824 PetscMPIInt n = A->rmap->n; 825 PetscInt i,j,r,m,p = 0,len = 0; 826 PetscInt *lrows,*owners = A->rmap->range; 827 PetscSFNode *rrows; 828 PetscSF sf; 829 const PetscScalar *xx; 830 PetscScalar *bb,*mask; 831 Vec xmask,lmask; 832 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 833 const PetscInt *aj, *ii,*ridx; 834 PetscScalar *aa; 835 836 PetscFunctionBegin; 837 /* Create SF where leaves are input rows and roots are owned rows */ 838 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 839 for (r = 0; r < n; ++r) lrows[r] = -1; 840 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 841 for (r = 0; r < N; ++r) { 842 const PetscInt idx = rows[r]; 843 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 844 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 845 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 846 } 847 rrows[r].rank = p; 848 rrows[r].index = rows[r] - owners[p]; 849 } 850 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 851 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 852 /* Collect flags for rows to be zeroed */ 853 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 854 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 856 /* Compress and put in row numbers */ 857 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 858 /* zero diagonal part of matrix */ 859 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 860 /* handle off diagonal part of matrix */ 861 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 862 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 863 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 864 for (i=0; i<len; i++) bb[lrows[i]] = 1; 865 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 866 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 869 if (x) { 870 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 873 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 874 } 875 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 876 /* remove zeroed rows of off diagonal matrix */ 877 ii = aij->i; 878 for (i=0; i<len; i++) { 879 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 /* loop over all elements of off process part of matrix zeroing removed columns*/ 882 if (aij->compressedrow.use) { 883 m = aij->compressedrow.nrows; 884 ii = aij->compressedrow.i; 885 ridx = aij->compressedrow.rindex; 886 for (i=0; i<m; i++) { 887 n = ii[i+1] - ii[i]; 888 aj = aij->j + ii[i]; 889 aa = aij->a + ii[i]; 890 891 for (j=0; j<n; j++) { 892 if (PetscAbsScalar(mask[*aj])) { 893 if (b) bb[*ridx] -= *aa*xx[*aj]; 894 *aa = 0.0; 895 } 896 aa++; 897 aj++; 898 } 899 ridx++; 900 } 901 } else { /* do not use compressed row format */ 902 m = l->B->rmap->n; 903 for (i=0; i<m; i++) { 904 n = ii[i+1] - ii[i]; 905 aj = aij->j + ii[i]; 906 aa = aij->a + ii[i]; 907 for (j=0; j<n; j++) { 908 if (PetscAbsScalar(mask[*aj])) { 909 if (b) bb[i] -= *aa*xx[*aj]; 910 *aa = 0.0; 911 } 912 aa++; 913 aj++; 914 } 915 } 916 } 917 if (x) { 918 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 919 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 920 } 921 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 922 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 923 ierr = PetscFree(lrows);CHKERRQ(ierr); 924 925 /* only change matrix nonzero state if pattern was allowed to be changed */ 926 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 927 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 928 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 929 } 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 934 { 935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt nt; 938 VecScatter Mvctx = a->Mvctx; 939 940 PetscFunctionBegin; 941 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 942 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 943 944 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 946 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 947 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 948 PetscFunctionReturn(0); 949 } 950 951 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 952 { 953 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 954 PetscErrorCode ierr; 955 956 PetscFunctionBegin; 957 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 962 { 963 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 964 PetscErrorCode ierr; 965 VecScatter Mvctx = a->Mvctx; 966 967 PetscFunctionBegin; 968 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 969 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 970 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 971 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 972 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 977 { 978 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 979 PetscErrorCode ierr; 980 PetscBool merged; 981 982 PetscFunctionBegin; 983 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 984 /* do nondiagonal part */ 985 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 986 if (!merged) { 987 /* send it on its way */ 988 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 989 /* do local part */ 990 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 991 /* receive remote parts: note this assumes the values are not actually */ 992 /* added in yy until the next line, */ 993 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 994 } else { 995 /* do local part */ 996 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 997 /* send it on its way */ 998 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 /* values actually were received in the Begin() but we need to call this nop */ 1000 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1001 } 1002 PetscFunctionReturn(0); 1003 } 1004 1005 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1006 { 1007 MPI_Comm comm; 1008 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1009 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1010 IS Me,Notme; 1011 PetscErrorCode ierr; 1012 PetscInt M,N,first,last,*notme,i; 1013 PetscMPIInt size; 1014 1015 PetscFunctionBegin; 1016 /* Easy test: symmetric diagonal block */ 1017 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1018 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1019 if (!*f) PetscFunctionReturn(0); 1020 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1021 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1022 if (size == 1) PetscFunctionReturn(0); 1023 1024 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1025 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1026 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1027 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1028 for (i=0; i<first; i++) notme[i] = i; 1029 for (i=last; i<M; i++) notme[i-last+first] = i; 1030 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1031 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1032 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1033 Aoff = Aoffs[0]; 1034 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1035 Boff = Boffs[0]; 1036 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1037 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1038 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1039 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1040 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1041 ierr = PetscFree(notme);CHKERRQ(ierr); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1046 { 1047 PetscErrorCode ierr; 1048 1049 PetscFunctionBegin; 1050 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1051 PetscFunctionReturn(0); 1052 } 1053 1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1055 { 1056 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 /* do nondiagonal part */ 1061 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1062 /* send it on its way */ 1063 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1064 /* do local part */ 1065 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1066 /* receive remote parts */ 1067 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 /* 1072 This only works correctly for square matrices where the subblock A->A is the 1073 diagonal block 1074 */ 1075 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1076 { 1077 PetscErrorCode ierr; 1078 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1079 1080 PetscFunctionBegin; 1081 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1082 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1083 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1088 { 1089 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 1092 PetscFunctionBegin; 1093 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1094 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1099 { 1100 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 #if defined(PETSC_USE_LOG) 1105 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1106 #endif 1107 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1108 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1109 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1110 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1111 #if defined(PETSC_USE_CTABLE) 1112 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1113 #else 1114 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1115 #endif 1116 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1117 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1118 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1119 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1120 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1121 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1122 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1123 1124 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1133 #if defined(PETSC_HAVE_ELEMENTAL) 1134 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1135 #endif 1136 #if defined(PETSC_HAVE_HYPRE) 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1139 #endif 1140 PetscFunctionReturn(0); 1141 } 1142 1143 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1144 { 1145 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1146 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1147 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1148 PetscErrorCode ierr; 1149 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1150 int fd; 1151 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1152 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1153 PetscScalar *column_values; 1154 PetscInt message_count,flowcontrolcount; 1155 FILE *file; 1156 1157 PetscFunctionBegin; 1158 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1159 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1160 nz = A->nz + B->nz; 1161 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1162 if (!rank) { 1163 header[0] = MAT_FILE_CLASSID; 1164 header[1] = mat->rmap->N; 1165 header[2] = mat->cmap->N; 1166 1167 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1168 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1169 /* get largest number of rows any processor has */ 1170 rlen = mat->rmap->n; 1171 range = mat->rmap->range; 1172 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1173 } else { 1174 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1175 rlen = mat->rmap->n; 1176 } 1177 1178 /* load up the local row counts */ 1179 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1180 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1181 1182 /* store the row lengths to the file */ 1183 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1184 if (!rank) { 1185 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1186 for (i=1; i<size; i++) { 1187 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1188 rlen = range[i+1] - range[i]; 1189 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1190 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1191 } 1192 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1193 } else { 1194 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1195 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1196 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1197 } 1198 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1199 1200 /* load up the local column indices */ 1201 nzmax = nz; /* th processor needs space a largest processor needs */ 1202 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1203 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1204 cnt = 0; 1205 for (i=0; i<mat->rmap->n; i++) { 1206 for (j=B->i[i]; j<B->i[i+1]; j++) { 1207 if ((col = garray[B->j[j]]) > cstart) break; 1208 column_indices[cnt++] = col; 1209 } 1210 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1211 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1212 } 1213 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1214 1215 /* store the column indices to the file */ 1216 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1217 if (!rank) { 1218 MPI_Status status; 1219 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1220 for (i=1; i<size; i++) { 1221 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1222 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1223 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1224 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1225 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1226 } 1227 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1228 } else { 1229 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1230 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1231 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1235 1236 /* load up the local column values */ 1237 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1238 cnt = 0; 1239 for (i=0; i<mat->rmap->n; i++) { 1240 for (j=B->i[i]; j<B->i[i+1]; j++) { 1241 if (garray[B->j[j]] > cstart) break; 1242 column_values[cnt++] = B->a[j]; 1243 } 1244 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1245 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1246 } 1247 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1248 1249 /* store the column values to the file */ 1250 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1251 if (!rank) { 1252 MPI_Status status; 1253 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1254 for (i=1; i<size; i++) { 1255 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1256 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1257 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1258 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1259 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1260 } 1261 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1262 } else { 1263 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1264 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1265 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1266 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1267 } 1268 ierr = PetscFree(column_values);CHKERRQ(ierr); 1269 1270 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1271 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #include <petscdraw.h> 1276 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1277 { 1278 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1279 PetscErrorCode ierr; 1280 PetscMPIInt rank = aij->rank,size = aij->size; 1281 PetscBool isdraw,iascii,isbinary; 1282 PetscViewer sviewer; 1283 PetscViewerFormat format; 1284 1285 PetscFunctionBegin; 1286 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1287 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1288 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1289 if (iascii) { 1290 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscBool inodes; 1294 1295 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1296 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1297 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1298 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1299 if (!inodes) { 1300 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1301 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1302 } else { 1303 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1304 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1305 } 1306 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1308 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1309 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1310 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1311 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1312 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1313 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1316 PetscInt inodecount,inodelimit,*inodes; 1317 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1318 if (inodes) { 1319 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1320 } else { 1321 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1322 } 1323 PetscFunctionReturn(0); 1324 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1325 PetscFunctionReturn(0); 1326 } 1327 } else if (isbinary) { 1328 if (size == 1) { 1329 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1330 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1331 } else { 1332 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1333 } 1334 PetscFunctionReturn(0); 1335 } else if (isdraw) { 1336 PetscDraw draw; 1337 PetscBool isnull; 1338 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1339 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1340 if (isnull) PetscFunctionReturn(0); 1341 } 1342 1343 { 1344 /* assemble the entire matrix onto first processor. */ 1345 Mat A; 1346 Mat_SeqAIJ *Aloc; 1347 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1348 MatScalar *a; 1349 1350 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1351 if (!rank) { 1352 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1353 } else { 1354 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1355 } 1356 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1357 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1358 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1359 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1360 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1361 1362 /* copy over the A part */ 1363 Aloc = (Mat_SeqAIJ*)aij->A->data; 1364 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1365 row = mat->rmap->rstart; 1366 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1367 for (i=0; i<m; i++) { 1368 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1369 row++; 1370 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1371 } 1372 aj = Aloc->j; 1373 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1374 1375 /* copy over the B part */ 1376 Aloc = (Mat_SeqAIJ*)aij->B->data; 1377 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1378 row = mat->rmap->rstart; 1379 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1380 ct = cols; 1381 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1382 for (i=0; i<m; i++) { 1383 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1384 row++; 1385 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1386 } 1387 ierr = PetscFree(ct);CHKERRQ(ierr); 1388 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1389 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1390 /* 1391 Everyone has to call to draw the matrix since the graphics waits are 1392 synchronized across all processors that share the PetscDraw object 1393 */ 1394 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1395 if (!rank) { 1396 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1397 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1398 } 1399 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1400 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1401 ierr = MatDestroy(&A);CHKERRQ(ierr); 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1407 { 1408 PetscErrorCode ierr; 1409 PetscBool iascii,isdraw,issocket,isbinary; 1410 1411 PetscFunctionBegin; 1412 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1413 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1414 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1415 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1416 if (iascii || isdraw || isbinary || issocket) { 1417 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1418 } 1419 PetscFunctionReturn(0); 1420 } 1421 1422 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1423 { 1424 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1425 PetscErrorCode ierr; 1426 Vec bb1 = 0; 1427 PetscBool hasop; 1428 1429 PetscFunctionBegin; 1430 if (flag == SOR_APPLY_UPPER) { 1431 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 } 1434 1435 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1436 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1437 } 1438 1439 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1440 if (flag & SOR_ZERO_INITIAL_GUESS) { 1441 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1442 its--; 1443 } 1444 1445 while (its--) { 1446 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1447 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1448 1449 /* update rhs: bb1 = bb - B*x */ 1450 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1451 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1452 1453 /* local sweep */ 1454 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1455 } 1456 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1457 if (flag & SOR_ZERO_INITIAL_GUESS) { 1458 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1459 its--; 1460 } 1461 while (its--) { 1462 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1463 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1464 1465 /* update rhs: bb1 = bb - B*x */ 1466 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1467 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1468 1469 /* local sweep */ 1470 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1471 } 1472 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1473 if (flag & SOR_ZERO_INITIAL_GUESS) { 1474 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1475 its--; 1476 } 1477 while (its--) { 1478 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1479 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1480 1481 /* update rhs: bb1 = bb - B*x */ 1482 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1483 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1484 1485 /* local sweep */ 1486 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1487 } 1488 } else if (flag & SOR_EISENSTAT) { 1489 Vec xx1; 1490 1491 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1492 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1493 1494 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1495 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1496 if (!mat->diag) { 1497 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1498 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1499 } 1500 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1501 if (hasop) { 1502 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1503 } else { 1504 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1505 } 1506 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1507 1508 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1509 1510 /* local sweep */ 1511 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1512 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1513 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1514 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1515 1516 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1517 1518 matin->factorerrortype = mat->A->factorerrortype; 1519 PetscFunctionReturn(0); 1520 } 1521 1522 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1523 { 1524 Mat aA,aB,Aperm; 1525 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1526 PetscScalar *aa,*ba; 1527 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1528 PetscSF rowsf,sf; 1529 IS parcolp = NULL; 1530 PetscBool done; 1531 PetscErrorCode ierr; 1532 1533 PetscFunctionBegin; 1534 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1535 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1536 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1537 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1538 1539 /* Invert row permutation to find out where my rows should go */ 1540 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1541 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1542 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1543 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1544 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1545 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1546 1547 /* Invert column permutation to find out where my columns should go */ 1548 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1549 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1550 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1551 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1552 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1553 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1554 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1555 1556 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1557 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1558 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1559 1560 /* Find out where my gcols should go */ 1561 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1562 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1563 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1564 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1565 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1566 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1567 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1568 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1569 1570 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1571 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1572 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1573 for (i=0; i<m; i++) { 1574 PetscInt row = rdest[i],rowner; 1575 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1576 for (j=ai[i]; j<ai[i+1]; j++) { 1577 PetscInt cowner,col = cdest[aj[j]]; 1578 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1579 if (rowner == cowner) dnnz[i]++; 1580 else onnz[i]++; 1581 } 1582 for (j=bi[i]; j<bi[i+1]; j++) { 1583 PetscInt cowner,col = gcdest[bj[j]]; 1584 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1585 if (rowner == cowner) dnnz[i]++; 1586 else onnz[i]++; 1587 } 1588 } 1589 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1590 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1591 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1592 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1593 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1594 1595 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1596 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1597 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1598 for (i=0; i<m; i++) { 1599 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1600 PetscInt j0,rowlen; 1601 rowlen = ai[i+1] - ai[i]; 1602 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1603 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1604 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1605 } 1606 rowlen = bi[i+1] - bi[i]; 1607 for (j0=j=0; j<rowlen; j0=j) { 1608 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1609 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1610 } 1611 } 1612 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1613 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1614 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1615 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1616 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1617 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1618 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1619 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1620 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1621 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1622 *B = Aperm; 1623 PetscFunctionReturn(0); 1624 } 1625 1626 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1627 { 1628 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1629 PetscErrorCode ierr; 1630 1631 PetscFunctionBegin; 1632 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1633 if (ghosts) *ghosts = aij->garray; 1634 PetscFunctionReturn(0); 1635 } 1636 1637 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1638 { 1639 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1640 Mat A = mat->A,B = mat->B; 1641 PetscErrorCode ierr; 1642 PetscReal isend[5],irecv[5]; 1643 1644 PetscFunctionBegin; 1645 info->block_size = 1.0; 1646 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1647 1648 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1649 isend[3] = info->memory; isend[4] = info->mallocs; 1650 1651 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1652 1653 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1654 isend[3] += info->memory; isend[4] += info->mallocs; 1655 if (flag == MAT_LOCAL) { 1656 info->nz_used = isend[0]; 1657 info->nz_allocated = isend[1]; 1658 info->nz_unneeded = isend[2]; 1659 info->memory = isend[3]; 1660 info->mallocs = isend[4]; 1661 } else if (flag == MAT_GLOBAL_MAX) { 1662 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1663 1664 info->nz_used = irecv[0]; 1665 info->nz_allocated = irecv[1]; 1666 info->nz_unneeded = irecv[2]; 1667 info->memory = irecv[3]; 1668 info->mallocs = irecv[4]; 1669 } else if (flag == MAT_GLOBAL_SUM) { 1670 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1671 1672 info->nz_used = irecv[0]; 1673 info->nz_allocated = irecv[1]; 1674 info->nz_unneeded = irecv[2]; 1675 info->memory = irecv[3]; 1676 info->mallocs = irecv[4]; 1677 } 1678 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1679 info->fill_ratio_needed = 0; 1680 info->factor_mallocs = 0; 1681 PetscFunctionReturn(0); 1682 } 1683 1684 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1685 { 1686 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1687 PetscErrorCode ierr; 1688 1689 PetscFunctionBegin; 1690 switch (op) { 1691 case MAT_NEW_NONZERO_LOCATIONS: 1692 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1693 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1694 case MAT_KEEP_NONZERO_PATTERN: 1695 case MAT_NEW_NONZERO_LOCATION_ERR: 1696 case MAT_USE_INODES: 1697 case MAT_IGNORE_ZERO_ENTRIES: 1698 MatCheckPreallocated(A,1); 1699 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1700 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1701 break; 1702 case MAT_ROW_ORIENTED: 1703 MatCheckPreallocated(A,1); 1704 a->roworiented = flg; 1705 1706 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1707 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1708 break; 1709 case MAT_NEW_DIAGONALS: 1710 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1711 break; 1712 case MAT_IGNORE_OFF_PROC_ENTRIES: 1713 a->donotstash = flg; 1714 break; 1715 case MAT_SPD: 1716 A->spd_set = PETSC_TRUE; 1717 A->spd = flg; 1718 if (flg) { 1719 A->symmetric = PETSC_TRUE; 1720 A->structurally_symmetric = PETSC_TRUE; 1721 A->symmetric_set = PETSC_TRUE; 1722 A->structurally_symmetric_set = PETSC_TRUE; 1723 } 1724 break; 1725 case MAT_SYMMETRIC: 1726 MatCheckPreallocated(A,1); 1727 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1728 break; 1729 case MAT_STRUCTURALLY_SYMMETRIC: 1730 MatCheckPreallocated(A,1); 1731 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1732 break; 1733 case MAT_HERMITIAN: 1734 MatCheckPreallocated(A,1); 1735 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1736 break; 1737 case MAT_SYMMETRY_ETERNAL: 1738 MatCheckPreallocated(A,1); 1739 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1740 break; 1741 case MAT_SUBMAT_SINGLEIS: 1742 A->submat_singleis = flg; 1743 break; 1744 case MAT_STRUCTURE_ONLY: 1745 /* The option is handled directly by MatSetOption() */ 1746 break; 1747 default: 1748 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1749 } 1750 PetscFunctionReturn(0); 1751 } 1752 1753 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1754 { 1755 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1756 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1757 PetscErrorCode ierr; 1758 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1759 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1760 PetscInt *cmap,*idx_p; 1761 1762 PetscFunctionBegin; 1763 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1764 mat->getrowactive = PETSC_TRUE; 1765 1766 if (!mat->rowvalues && (idx || v)) { 1767 /* 1768 allocate enough space to hold information from the longest row. 1769 */ 1770 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1771 PetscInt max = 1,tmp; 1772 for (i=0; i<matin->rmap->n; i++) { 1773 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1774 if (max < tmp) max = tmp; 1775 } 1776 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1777 } 1778 1779 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1780 lrow = row - rstart; 1781 1782 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1783 if (!v) {pvA = 0; pvB = 0;} 1784 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1785 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1786 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1787 nztot = nzA + nzB; 1788 1789 cmap = mat->garray; 1790 if (v || idx) { 1791 if (nztot) { 1792 /* Sort by increasing column numbers, assuming A and B already sorted */ 1793 PetscInt imark = -1; 1794 if (v) { 1795 *v = v_p = mat->rowvalues; 1796 for (i=0; i<nzB; i++) { 1797 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1798 else break; 1799 } 1800 imark = i; 1801 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1802 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1803 } 1804 if (idx) { 1805 *idx = idx_p = mat->rowindices; 1806 if (imark > -1) { 1807 for (i=0; i<imark; i++) { 1808 idx_p[i] = cmap[cworkB[i]]; 1809 } 1810 } else { 1811 for (i=0; i<nzB; i++) { 1812 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1813 else break; 1814 } 1815 imark = i; 1816 } 1817 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1818 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1819 } 1820 } else { 1821 if (idx) *idx = 0; 1822 if (v) *v = 0; 1823 } 1824 } 1825 *nz = nztot; 1826 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1827 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1828 PetscFunctionReturn(0); 1829 } 1830 1831 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1832 { 1833 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1834 1835 PetscFunctionBegin; 1836 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1837 aij->getrowactive = PETSC_FALSE; 1838 PetscFunctionReturn(0); 1839 } 1840 1841 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1842 { 1843 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1844 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1845 PetscErrorCode ierr; 1846 PetscInt i,j,cstart = mat->cmap->rstart; 1847 PetscReal sum = 0.0; 1848 MatScalar *v; 1849 1850 PetscFunctionBegin; 1851 if (aij->size == 1) { 1852 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1853 } else { 1854 if (type == NORM_FROBENIUS) { 1855 v = amat->a; 1856 for (i=0; i<amat->nz; i++) { 1857 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1858 } 1859 v = bmat->a; 1860 for (i=0; i<bmat->nz; i++) { 1861 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1862 } 1863 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1864 *norm = PetscSqrtReal(*norm); 1865 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1866 } else if (type == NORM_1) { /* max column norm */ 1867 PetscReal *tmp,*tmp2; 1868 PetscInt *jj,*garray = aij->garray; 1869 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1870 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1871 *norm = 0.0; 1872 v = amat->a; jj = amat->j; 1873 for (j=0; j<amat->nz; j++) { 1874 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1875 } 1876 v = bmat->a; jj = bmat->j; 1877 for (j=0; j<bmat->nz; j++) { 1878 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1879 } 1880 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1881 for (j=0; j<mat->cmap->N; j++) { 1882 if (tmp2[j] > *norm) *norm = tmp2[j]; 1883 } 1884 ierr = PetscFree(tmp);CHKERRQ(ierr); 1885 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1886 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1887 } else if (type == NORM_INFINITY) { /* max row norm */ 1888 PetscReal ntemp = 0.0; 1889 for (j=0; j<aij->A->rmap->n; j++) { 1890 v = amat->a + amat->i[j]; 1891 sum = 0.0; 1892 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1893 sum += PetscAbsScalar(*v); v++; 1894 } 1895 v = bmat->a + bmat->i[j]; 1896 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1897 sum += PetscAbsScalar(*v); v++; 1898 } 1899 if (sum > ntemp) ntemp = sum; 1900 } 1901 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1902 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1903 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1904 } 1905 PetscFunctionReturn(0); 1906 } 1907 1908 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1909 { 1910 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1911 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1912 PetscErrorCode ierr; 1913 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1914 PetscInt cstart = A->cmap->rstart,ncol; 1915 Mat B; 1916 MatScalar *array; 1917 1918 PetscFunctionBegin; 1919 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1920 1921 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1922 ai = Aloc->i; aj = Aloc->j; 1923 bi = Bloc->i; bj = Bloc->j; 1924 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1925 PetscInt *d_nnz,*g_nnz,*o_nnz; 1926 PetscSFNode *oloc; 1927 PETSC_UNUSED PetscSF sf; 1928 1929 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1930 /* compute d_nnz for preallocation */ 1931 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1932 for (i=0; i<ai[ma]; i++) { 1933 d_nnz[aj[i]]++; 1934 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1935 } 1936 /* compute local off-diagonal contributions */ 1937 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1938 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1939 /* map those to global */ 1940 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1941 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1942 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1943 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1944 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1945 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1946 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1947 1948 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1949 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1950 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1951 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1952 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1953 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1954 } else { 1955 B = *matout; 1956 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1957 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1958 } 1959 1960 /* copy over the A part */ 1961 array = Aloc->a; 1962 row = A->rmap->rstart; 1963 for (i=0; i<ma; i++) { 1964 ncol = ai[i+1]-ai[i]; 1965 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1966 row++; 1967 array += ncol; aj += ncol; 1968 } 1969 aj = Aloc->j; 1970 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1971 1972 /* copy over the B part */ 1973 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1974 array = Bloc->a; 1975 row = A->rmap->rstart; 1976 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1977 cols_tmp = cols; 1978 for (i=0; i<mb; i++) { 1979 ncol = bi[i+1]-bi[i]; 1980 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1981 row++; 1982 array += ncol; cols_tmp += ncol; 1983 } 1984 ierr = PetscFree(cols);CHKERRQ(ierr); 1985 1986 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1987 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1988 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1989 *matout = B; 1990 } else { 1991 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1992 } 1993 PetscFunctionReturn(0); 1994 } 1995 1996 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1997 { 1998 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1999 Mat a = aij->A,b = aij->B; 2000 PetscErrorCode ierr; 2001 PetscInt s1,s2,s3; 2002 2003 PetscFunctionBegin; 2004 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2005 if (rr) { 2006 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2007 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2008 /* Overlap communication with computation. */ 2009 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2010 } 2011 if (ll) { 2012 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2013 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2014 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2015 } 2016 /* scale the diagonal block */ 2017 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2018 2019 if (rr) { 2020 /* Do a scatter end and then right scale the off-diagonal block */ 2021 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2022 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2023 } 2024 PetscFunctionReturn(0); 2025 } 2026 2027 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2028 { 2029 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2030 PetscErrorCode ierr; 2031 2032 PetscFunctionBegin; 2033 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2038 { 2039 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2040 Mat a,b,c,d; 2041 PetscBool flg; 2042 PetscErrorCode ierr; 2043 2044 PetscFunctionBegin; 2045 a = matA->A; b = matA->B; 2046 c = matB->A; d = matB->B; 2047 2048 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2049 if (flg) { 2050 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2051 } 2052 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2057 { 2058 PetscErrorCode ierr; 2059 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2060 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2061 2062 PetscFunctionBegin; 2063 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2064 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2065 /* because of the column compression in the off-processor part of the matrix a->B, 2066 the number of columns in a->B and b->B may be different, hence we cannot call 2067 the MatCopy() directly on the two parts. If need be, we can provide a more 2068 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2069 then copying the submatrices */ 2070 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2071 } else { 2072 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2073 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2074 } 2075 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2080 { 2081 PetscErrorCode ierr; 2082 2083 PetscFunctionBegin; 2084 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2085 PetscFunctionReturn(0); 2086 } 2087 2088 /* 2089 Computes the number of nonzeros per row needed for preallocation when X and Y 2090 have different nonzero structure. 2091 */ 2092 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2093 { 2094 PetscInt i,j,k,nzx,nzy; 2095 2096 PetscFunctionBegin; 2097 /* Set the number of nonzeros in the new matrix */ 2098 for (i=0; i<m; i++) { 2099 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2100 nzx = xi[i+1] - xi[i]; 2101 nzy = yi[i+1] - yi[i]; 2102 nnz[i] = 0; 2103 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2104 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2105 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2106 nnz[i]++; 2107 } 2108 for (; k<nzy; k++) nnz[i]++; 2109 } 2110 PetscFunctionReturn(0); 2111 } 2112 2113 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2114 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2115 { 2116 PetscErrorCode ierr; 2117 PetscInt m = Y->rmap->N; 2118 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2119 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2120 2121 PetscFunctionBegin; 2122 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2123 PetscFunctionReturn(0); 2124 } 2125 2126 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2127 { 2128 PetscErrorCode ierr; 2129 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2130 PetscBLASInt bnz,one=1; 2131 Mat_SeqAIJ *x,*y; 2132 2133 PetscFunctionBegin; 2134 if (str == SAME_NONZERO_PATTERN) { 2135 PetscScalar alpha = a; 2136 x = (Mat_SeqAIJ*)xx->A->data; 2137 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2138 y = (Mat_SeqAIJ*)yy->A->data; 2139 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2140 x = (Mat_SeqAIJ*)xx->B->data; 2141 y = (Mat_SeqAIJ*)yy->B->data; 2142 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2143 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2144 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2145 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2146 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2147 } else { 2148 Mat B; 2149 PetscInt *nnz_d,*nnz_o; 2150 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2151 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2152 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2153 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2154 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2155 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2156 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2157 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2158 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2159 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2160 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2161 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2162 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2163 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2169 2170 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2171 { 2172 #if defined(PETSC_USE_COMPLEX) 2173 PetscErrorCode ierr; 2174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2175 2176 PetscFunctionBegin; 2177 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2178 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2179 #else 2180 PetscFunctionBegin; 2181 #endif 2182 PetscFunctionReturn(0); 2183 } 2184 2185 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2186 { 2187 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2188 PetscErrorCode ierr; 2189 2190 PetscFunctionBegin; 2191 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2192 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2193 PetscFunctionReturn(0); 2194 } 2195 2196 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2197 { 2198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2203 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2208 { 2209 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2210 PetscErrorCode ierr; 2211 PetscInt i,*idxb = 0; 2212 PetscScalar *va,*vb; 2213 Vec vtmp; 2214 2215 PetscFunctionBegin; 2216 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2217 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2218 if (idx) { 2219 for (i=0; i<A->rmap->n; i++) { 2220 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2221 } 2222 } 2223 2224 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2225 if (idx) { 2226 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2227 } 2228 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2229 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2230 2231 for (i=0; i<A->rmap->n; i++) { 2232 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2233 va[i] = vb[i]; 2234 if (idx) idx[i] = a->garray[idxb[i]]; 2235 } 2236 } 2237 2238 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2239 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2240 ierr = PetscFree(idxb);CHKERRQ(ierr); 2241 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2242 PetscFunctionReturn(0); 2243 } 2244 2245 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2246 { 2247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2248 PetscErrorCode ierr; 2249 PetscInt i,*idxb = 0; 2250 PetscScalar *va,*vb; 2251 Vec vtmp; 2252 2253 PetscFunctionBegin; 2254 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2255 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2256 if (idx) { 2257 for (i=0; i<A->cmap->n; i++) { 2258 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2259 } 2260 } 2261 2262 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2263 if (idx) { 2264 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2265 } 2266 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2267 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2268 2269 for (i=0; i<A->rmap->n; i++) { 2270 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2271 va[i] = vb[i]; 2272 if (idx) idx[i] = a->garray[idxb[i]]; 2273 } 2274 } 2275 2276 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2277 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2278 ierr = PetscFree(idxb);CHKERRQ(ierr); 2279 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2280 PetscFunctionReturn(0); 2281 } 2282 2283 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2284 { 2285 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2286 PetscInt n = A->rmap->n; 2287 PetscInt cstart = A->cmap->rstart; 2288 PetscInt *cmap = mat->garray; 2289 PetscInt *diagIdx, *offdiagIdx; 2290 Vec diagV, offdiagV; 2291 PetscScalar *a, *diagA, *offdiagA; 2292 PetscInt r; 2293 PetscErrorCode ierr; 2294 2295 PetscFunctionBegin; 2296 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2297 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2298 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2299 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2300 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2301 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2302 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2303 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2304 for (r = 0; r < n; ++r) { 2305 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2306 a[r] = diagA[r]; 2307 idx[r] = cstart + diagIdx[r]; 2308 } else { 2309 a[r] = offdiagA[r]; 2310 idx[r] = cmap[offdiagIdx[r]]; 2311 } 2312 } 2313 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2314 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2315 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2316 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2317 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2318 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2323 { 2324 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2325 PetscInt n = A->rmap->n; 2326 PetscInt cstart = A->cmap->rstart; 2327 PetscInt *cmap = mat->garray; 2328 PetscInt *diagIdx, *offdiagIdx; 2329 Vec diagV, offdiagV; 2330 PetscScalar *a, *diagA, *offdiagA; 2331 PetscInt r; 2332 PetscErrorCode ierr; 2333 2334 PetscFunctionBegin; 2335 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2336 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2337 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2338 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2339 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2340 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2341 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2342 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2343 for (r = 0; r < n; ++r) { 2344 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2345 a[r] = diagA[r]; 2346 idx[r] = cstart + diagIdx[r]; 2347 } else { 2348 a[r] = offdiagA[r]; 2349 idx[r] = cmap[offdiagIdx[r]]; 2350 } 2351 } 2352 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2353 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2354 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2355 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2356 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2357 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2358 PetscFunctionReturn(0); 2359 } 2360 2361 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2362 { 2363 PetscErrorCode ierr; 2364 Mat *dummy; 2365 2366 PetscFunctionBegin; 2367 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2368 *newmat = *dummy; 2369 ierr = PetscFree(dummy);CHKERRQ(ierr); 2370 PetscFunctionReturn(0); 2371 } 2372 2373 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2374 { 2375 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2376 PetscErrorCode ierr; 2377 2378 PetscFunctionBegin; 2379 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2380 A->factorerrortype = a->A->factorerrortype; 2381 PetscFunctionReturn(0); 2382 } 2383 2384 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2385 { 2386 PetscErrorCode ierr; 2387 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2388 2389 PetscFunctionBegin; 2390 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2391 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2392 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2393 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2398 { 2399 PetscFunctionBegin; 2400 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2401 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2402 PetscFunctionReturn(0); 2403 } 2404 2405 /*@ 2406 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2407 2408 Collective on Mat 2409 2410 Input Parameters: 2411 + A - the matrix 2412 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2413 2414 Level: advanced 2415 2416 @*/ 2417 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2418 { 2419 PetscErrorCode ierr; 2420 2421 PetscFunctionBegin; 2422 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2423 PetscFunctionReturn(0); 2424 } 2425 2426 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2427 { 2428 PetscErrorCode ierr; 2429 PetscBool sc = PETSC_FALSE,flg; 2430 2431 PetscFunctionBegin; 2432 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2433 ierr = PetscObjectOptionsBegin((PetscObject)A); 2434 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2435 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2436 if (flg) { 2437 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2438 } 2439 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2444 { 2445 PetscErrorCode ierr; 2446 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2447 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2448 2449 PetscFunctionBegin; 2450 if (!Y->preallocated) { 2451 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2452 } else if (!aij->nz) { 2453 PetscInt nonew = aij->nonew; 2454 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2455 aij->nonew = nonew; 2456 } 2457 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2458 PetscFunctionReturn(0); 2459 } 2460 2461 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2462 { 2463 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2464 PetscErrorCode ierr; 2465 2466 PetscFunctionBegin; 2467 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2468 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2469 if (d) { 2470 PetscInt rstart; 2471 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2472 *d += rstart; 2473 2474 } 2475 PetscFunctionReturn(0); 2476 } 2477 2478 2479 /* -------------------------------------------------------------------*/ 2480 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2481 MatGetRow_MPIAIJ, 2482 MatRestoreRow_MPIAIJ, 2483 MatMult_MPIAIJ, 2484 /* 4*/ MatMultAdd_MPIAIJ, 2485 MatMultTranspose_MPIAIJ, 2486 MatMultTransposeAdd_MPIAIJ, 2487 0, 2488 0, 2489 0, 2490 /*10*/ 0, 2491 0, 2492 0, 2493 MatSOR_MPIAIJ, 2494 MatTranspose_MPIAIJ, 2495 /*15*/ MatGetInfo_MPIAIJ, 2496 MatEqual_MPIAIJ, 2497 MatGetDiagonal_MPIAIJ, 2498 MatDiagonalScale_MPIAIJ, 2499 MatNorm_MPIAIJ, 2500 /*20*/ MatAssemblyBegin_MPIAIJ, 2501 MatAssemblyEnd_MPIAIJ, 2502 MatSetOption_MPIAIJ, 2503 MatZeroEntries_MPIAIJ, 2504 /*24*/ MatZeroRows_MPIAIJ, 2505 0, 2506 0, 2507 0, 2508 0, 2509 /*29*/ MatSetUp_MPIAIJ, 2510 0, 2511 0, 2512 MatGetDiagonalBlock_MPIAIJ, 2513 0, 2514 /*34*/ MatDuplicate_MPIAIJ, 2515 0, 2516 0, 2517 0, 2518 0, 2519 /*39*/ MatAXPY_MPIAIJ, 2520 MatCreateSubMatrices_MPIAIJ, 2521 MatIncreaseOverlap_MPIAIJ, 2522 MatGetValues_MPIAIJ, 2523 MatCopy_MPIAIJ, 2524 /*44*/ MatGetRowMax_MPIAIJ, 2525 MatScale_MPIAIJ, 2526 MatShift_MPIAIJ, 2527 MatDiagonalSet_MPIAIJ, 2528 MatZeroRowsColumns_MPIAIJ, 2529 /*49*/ MatSetRandom_MPIAIJ, 2530 0, 2531 0, 2532 0, 2533 0, 2534 /*54*/ MatFDColoringCreate_MPIXAIJ, 2535 0, 2536 MatSetUnfactored_MPIAIJ, 2537 MatPermute_MPIAIJ, 2538 0, 2539 /*59*/ MatCreateSubMatrix_MPIAIJ, 2540 MatDestroy_MPIAIJ, 2541 MatView_MPIAIJ, 2542 0, 2543 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2544 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2545 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2546 0, 2547 0, 2548 0, 2549 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2550 MatGetRowMinAbs_MPIAIJ, 2551 0, 2552 0, 2553 0, 2554 0, 2555 /*75*/ MatFDColoringApply_AIJ, 2556 MatSetFromOptions_MPIAIJ, 2557 0, 2558 0, 2559 MatFindZeroDiagonals_MPIAIJ, 2560 /*80*/ 0, 2561 0, 2562 0, 2563 /*83*/ MatLoad_MPIAIJ, 2564 MatIsSymmetric_MPIAIJ, 2565 0, 2566 0, 2567 0, 2568 0, 2569 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2570 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2571 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2572 MatPtAP_MPIAIJ_MPIAIJ, 2573 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2574 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2575 0, 2576 0, 2577 0, 2578 0, 2579 /*99*/ 0, 2580 0, 2581 0, 2582 MatConjugate_MPIAIJ, 2583 0, 2584 /*104*/MatSetValuesRow_MPIAIJ, 2585 MatRealPart_MPIAIJ, 2586 MatImaginaryPart_MPIAIJ, 2587 0, 2588 0, 2589 /*109*/0, 2590 0, 2591 MatGetRowMin_MPIAIJ, 2592 0, 2593 MatMissingDiagonal_MPIAIJ, 2594 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2595 0, 2596 MatGetGhosts_MPIAIJ, 2597 0, 2598 0, 2599 /*119*/0, 2600 0, 2601 0, 2602 0, 2603 MatGetMultiProcBlock_MPIAIJ, 2604 /*124*/MatFindNonzeroRows_MPIAIJ, 2605 MatGetColumnNorms_MPIAIJ, 2606 MatInvertBlockDiagonal_MPIAIJ, 2607 0, 2608 MatCreateSubMatricesMPI_MPIAIJ, 2609 /*129*/0, 2610 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2611 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2612 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2613 0, 2614 /*134*/0, 2615 0, 2616 MatRARt_MPIAIJ_MPIAIJ, 2617 0, 2618 0, 2619 /*139*/MatSetBlockSizes_MPIAIJ, 2620 0, 2621 0, 2622 MatFDColoringSetUp_MPIXAIJ, 2623 MatFindOffBlockDiagonalEntries_MPIAIJ, 2624 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2625 }; 2626 2627 /* ----------------------------------------------------------------------------------------*/ 2628 2629 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2630 { 2631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2632 PetscErrorCode ierr; 2633 2634 PetscFunctionBegin; 2635 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2636 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2637 PetscFunctionReturn(0); 2638 } 2639 2640 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2641 { 2642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2643 PetscErrorCode ierr; 2644 2645 PetscFunctionBegin; 2646 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2647 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2648 PetscFunctionReturn(0); 2649 } 2650 2651 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2652 { 2653 Mat_MPIAIJ *b; 2654 PetscErrorCode ierr; 2655 2656 PetscFunctionBegin; 2657 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2658 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2659 b = (Mat_MPIAIJ*)B->data; 2660 2661 #if defined(PETSC_USE_CTABLE) 2662 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2663 #else 2664 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2665 #endif 2666 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2667 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2668 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2669 2670 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2671 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2672 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2673 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2674 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2675 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2676 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2677 2678 if (!B->preallocated) { 2679 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2680 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2681 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2682 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2683 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2684 } 2685 2686 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2687 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2688 B->preallocated = PETSC_TRUE; 2689 B->was_assembled = PETSC_FALSE; 2690 B->assembled = PETSC_FALSE;; 2691 PetscFunctionReturn(0); 2692 } 2693 2694 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2695 { 2696 Mat_MPIAIJ *b; 2697 PetscErrorCode ierr; 2698 2699 PetscFunctionBegin; 2700 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2701 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2702 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2703 b = (Mat_MPIAIJ*)B->data; 2704 2705 #if defined(PETSC_USE_CTABLE) 2706 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2707 #else 2708 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2709 #endif 2710 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2711 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2712 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2713 2714 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2715 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2716 B->preallocated = PETSC_TRUE; 2717 B->was_assembled = PETSC_FALSE; 2718 B->assembled = PETSC_FALSE; 2719 PetscFunctionReturn(0); 2720 } 2721 2722 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2723 { 2724 Mat mat; 2725 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2726 PetscErrorCode ierr; 2727 2728 PetscFunctionBegin; 2729 *newmat = 0; 2730 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2731 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2732 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2733 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2734 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2735 a = (Mat_MPIAIJ*)mat->data; 2736 2737 mat->factortype = matin->factortype; 2738 mat->assembled = PETSC_TRUE; 2739 mat->insertmode = NOT_SET_VALUES; 2740 mat->preallocated = PETSC_TRUE; 2741 2742 a->size = oldmat->size; 2743 a->rank = oldmat->rank; 2744 a->donotstash = oldmat->donotstash; 2745 a->roworiented = oldmat->roworiented; 2746 a->rowindices = 0; 2747 a->rowvalues = 0; 2748 a->getrowactive = PETSC_FALSE; 2749 2750 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2751 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2752 2753 if (oldmat->colmap) { 2754 #if defined(PETSC_USE_CTABLE) 2755 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2756 #else 2757 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2758 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2759 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2760 #endif 2761 } else a->colmap = 0; 2762 if (oldmat->garray) { 2763 PetscInt len; 2764 len = oldmat->B->cmap->n; 2765 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2766 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2767 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2768 } else a->garray = 0; 2769 2770 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2771 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2772 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2773 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2774 2775 if (oldmat->Mvctx_mpi1) { 2776 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2777 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2778 } 2779 2780 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2781 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2782 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2783 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2784 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2785 *newmat = mat; 2786 PetscFunctionReturn(0); 2787 } 2788 2789 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2790 { 2791 PetscScalar *vals,*svals; 2792 MPI_Comm comm; 2793 PetscErrorCode ierr; 2794 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2795 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2796 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2797 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2798 PetscInt cend,cstart,n,*rowners; 2799 int fd; 2800 PetscInt bs = newMat->rmap->bs; 2801 2802 PetscFunctionBegin; 2803 /* force binary viewer to load .info file if it has not yet done so */ 2804 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2805 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2806 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2807 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2808 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2809 if (!rank) { 2810 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2811 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2812 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2813 } 2814 2815 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2816 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2817 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2818 if (bs < 0) bs = 1; 2819 2820 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2821 M = header[1]; N = header[2]; 2822 2823 /* If global sizes are set, check if they are consistent with that given in the file */ 2824 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2825 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2826 2827 /* determine ownership of all (block) rows */ 2828 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2829 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2830 else m = newMat->rmap->n; /* Set by user */ 2831 2832 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2833 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2834 2835 /* First process needs enough room for process with most rows */ 2836 if (!rank) { 2837 mmax = rowners[1]; 2838 for (i=2; i<=size; i++) { 2839 mmax = PetscMax(mmax, rowners[i]); 2840 } 2841 } else mmax = -1; /* unused, but compilers complain */ 2842 2843 rowners[0] = 0; 2844 for (i=2; i<=size; i++) { 2845 rowners[i] += rowners[i-1]; 2846 } 2847 rstart = rowners[rank]; 2848 rend = rowners[rank+1]; 2849 2850 /* distribute row lengths to all processors */ 2851 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2852 if (!rank) { 2853 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2854 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2855 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2856 for (j=0; j<m; j++) { 2857 procsnz[0] += ourlens[j]; 2858 } 2859 for (i=1; i<size; i++) { 2860 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2861 /* calculate the number of nonzeros on each processor */ 2862 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2863 procsnz[i] += rowlengths[j]; 2864 } 2865 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2866 } 2867 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2868 } else { 2869 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2870 } 2871 2872 if (!rank) { 2873 /* determine max buffer needed and allocate it */ 2874 maxnz = 0; 2875 for (i=0; i<size; i++) { 2876 maxnz = PetscMax(maxnz,procsnz[i]); 2877 } 2878 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2879 2880 /* read in my part of the matrix column indices */ 2881 nz = procsnz[0]; 2882 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2883 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2884 2885 /* read in every one elses and ship off */ 2886 for (i=1; i<size; i++) { 2887 nz = procsnz[i]; 2888 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2889 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2890 } 2891 ierr = PetscFree(cols);CHKERRQ(ierr); 2892 } else { 2893 /* determine buffer space needed for message */ 2894 nz = 0; 2895 for (i=0; i<m; i++) { 2896 nz += ourlens[i]; 2897 } 2898 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2899 2900 /* receive message of column indices*/ 2901 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2902 } 2903 2904 /* determine column ownership if matrix is not square */ 2905 if (N != M) { 2906 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2907 else n = newMat->cmap->n; 2908 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2909 cstart = cend - n; 2910 } else { 2911 cstart = rstart; 2912 cend = rend; 2913 n = cend - cstart; 2914 } 2915 2916 /* loop over local rows, determining number of off diagonal entries */ 2917 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2918 jj = 0; 2919 for (i=0; i<m; i++) { 2920 for (j=0; j<ourlens[i]; j++) { 2921 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2922 jj++; 2923 } 2924 } 2925 2926 for (i=0; i<m; i++) { 2927 ourlens[i] -= offlens[i]; 2928 } 2929 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2930 2931 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2932 2933 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2934 2935 for (i=0; i<m; i++) { 2936 ourlens[i] += offlens[i]; 2937 } 2938 2939 if (!rank) { 2940 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2941 2942 /* read in my part of the matrix numerical values */ 2943 nz = procsnz[0]; 2944 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2945 2946 /* insert into matrix */ 2947 jj = rstart; 2948 smycols = mycols; 2949 svals = vals; 2950 for (i=0; i<m; i++) { 2951 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2952 smycols += ourlens[i]; 2953 svals += ourlens[i]; 2954 jj++; 2955 } 2956 2957 /* read in other processors and ship out */ 2958 for (i=1; i<size; i++) { 2959 nz = procsnz[i]; 2960 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2961 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2962 } 2963 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2964 } else { 2965 /* receive numeric values */ 2966 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2967 2968 /* receive message of values*/ 2969 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2970 2971 /* insert into matrix */ 2972 jj = rstart; 2973 smycols = mycols; 2974 svals = vals; 2975 for (i=0; i<m; i++) { 2976 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2977 smycols += ourlens[i]; 2978 svals += ourlens[i]; 2979 jj++; 2980 } 2981 } 2982 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2983 ierr = PetscFree(vals);CHKERRQ(ierr); 2984 ierr = PetscFree(mycols);CHKERRQ(ierr); 2985 ierr = PetscFree(rowners);CHKERRQ(ierr); 2986 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2987 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2988 PetscFunctionReturn(0); 2989 } 2990 2991 /* Not scalable because of ISAllGather() unless getting all columns. */ 2992 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2993 { 2994 PetscErrorCode ierr; 2995 IS iscol_local; 2996 PetscBool isstride; 2997 PetscMPIInt lisstride=0,gisstride; 2998 2999 PetscFunctionBegin; 3000 /* check if we are grabbing all columns*/ 3001 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3002 3003 if (isstride) { 3004 PetscInt start,len,mstart,mlen; 3005 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3006 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3007 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3008 if (mstart == start && mlen-mstart == len) lisstride = 1; 3009 } 3010 3011 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3012 if (gisstride) { 3013 PetscInt N; 3014 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3015 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3016 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3017 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3018 } else { 3019 PetscInt cbs; 3020 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3021 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3022 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3023 } 3024 3025 *isseq = iscol_local; 3026 PetscFunctionReturn(0); 3027 } 3028 3029 /* 3030 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3031 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3032 3033 Input Parameters: 3034 mat - matrix 3035 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3036 i.e., mat->rstart <= isrow[i] < mat->rend 3037 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3038 i.e., mat->cstart <= iscol[i] < mat->cend 3039 Output Parameter: 3040 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3041 iscol_o - sequential column index set for retrieving mat->B 3042 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3043 */ 3044 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3045 { 3046 PetscErrorCode ierr; 3047 Vec x,cmap; 3048 const PetscInt *is_idx; 3049 PetscScalar *xarray,*cmaparray; 3050 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3051 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3052 Mat B=a->B; 3053 Vec lvec=a->lvec,lcmap; 3054 PetscInt i,cstart,cend,Bn=B->cmap->N; 3055 MPI_Comm comm; 3056 VecScatter Mvctx=a->Mvctx; 3057 3058 PetscFunctionBegin; 3059 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3060 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3061 3062 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3063 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3064 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3065 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3066 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3067 3068 /* Get start indices */ 3069 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3070 isstart -= ncols; 3071 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3072 3073 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3074 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3075 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3076 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3077 for (i=0; i<ncols; i++) { 3078 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3079 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3080 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3081 } 3082 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3083 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3084 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3085 3086 /* Get iscol_d */ 3087 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3088 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3089 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3090 3091 /* Get isrow_d */ 3092 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3093 rstart = mat->rmap->rstart; 3094 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3095 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3096 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3097 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3098 3099 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3100 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3101 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3102 3103 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3104 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3105 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3106 3107 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3108 3109 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3110 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3111 3112 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3113 /* off-process column indices */ 3114 count = 0; 3115 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3116 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3117 3118 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3119 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3120 for (i=0; i<Bn; i++) { 3121 if (PetscRealPart(xarray[i]) > -1.0) { 3122 idx[count] = i; /* local column index in off-diagonal part B */ 3123 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3124 count++; 3125 } 3126 } 3127 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3128 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3129 3130 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3131 /* cannot ensure iscol_o has same blocksize as iscol! */ 3132 3133 ierr = PetscFree(idx);CHKERRQ(ierr); 3134 *garray = cmap1; 3135 3136 ierr = VecDestroy(&x);CHKERRQ(ierr); 3137 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3138 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3139 PetscFunctionReturn(0); 3140 } 3141 3142 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3143 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3144 { 3145 PetscErrorCode ierr; 3146 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3147 Mat M = NULL; 3148 MPI_Comm comm; 3149 IS iscol_d,isrow_d,iscol_o; 3150 Mat Asub = NULL,Bsub = NULL; 3151 PetscInt n; 3152 3153 PetscFunctionBegin; 3154 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3155 3156 if (call == MAT_REUSE_MATRIX) { 3157 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3158 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3159 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3160 3161 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3162 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3163 3164 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3165 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3166 3167 /* Update diagonal and off-diagonal portions of submat */ 3168 asub = (Mat_MPIAIJ*)(*submat)->data; 3169 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3170 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3171 if (n) { 3172 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3173 } 3174 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3175 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3176 3177 } else { /* call == MAT_INITIAL_MATRIX) */ 3178 const PetscInt *garray; 3179 PetscInt BsubN; 3180 3181 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3182 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3183 3184 /* Create local submatrices Asub and Bsub */ 3185 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3186 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3187 3188 /* Create submatrix M */ 3189 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3190 3191 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3192 asub = (Mat_MPIAIJ*)M->data; 3193 3194 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3195 n = asub->B->cmap->N; 3196 if (BsubN > n) { 3197 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3198 const PetscInt *idx; 3199 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3200 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3201 3202 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3203 j = 0; 3204 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3205 for (i=0; i<n; i++) { 3206 if (j >= BsubN) break; 3207 while (subgarray[i] > garray[j]) j++; 3208 3209 if (subgarray[i] == garray[j]) { 3210 idx_new[i] = idx[j++]; 3211 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3212 } 3213 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3214 3215 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3217 3218 } else if (BsubN < n) { 3219 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3220 } 3221 3222 ierr = PetscFree(garray);CHKERRQ(ierr); 3223 *submat = M; 3224 3225 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3226 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3227 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3228 3229 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3230 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3231 3232 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3233 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3234 } 3235 PetscFunctionReturn(0); 3236 } 3237 3238 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3239 { 3240 PetscErrorCode ierr; 3241 IS iscol_local=NULL,isrow_d; 3242 PetscInt csize; 3243 PetscInt n,i,j,start,end; 3244 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3245 MPI_Comm comm; 3246 3247 PetscFunctionBegin; 3248 /* If isrow has same processor distribution as mat, 3249 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3250 if (call == MAT_REUSE_MATRIX) { 3251 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3252 if (isrow_d) { 3253 sameRowDist = PETSC_TRUE; 3254 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3255 } else { 3256 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3257 if (iscol_local) { 3258 sameRowDist = PETSC_TRUE; 3259 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3260 } 3261 } 3262 } else { 3263 /* Check if isrow has same processor distribution as mat */ 3264 sameDist[0] = PETSC_FALSE; 3265 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3266 if (!n) { 3267 sameDist[0] = PETSC_TRUE; 3268 } else { 3269 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3270 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3271 if (i >= start && j < end) { 3272 sameDist[0] = PETSC_TRUE; 3273 } 3274 } 3275 3276 /* Check if iscol has same processor distribution as mat */ 3277 sameDist[1] = PETSC_FALSE; 3278 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3279 if (!n) { 3280 sameDist[1] = PETSC_TRUE; 3281 } else { 3282 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3283 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3284 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3285 } 3286 3287 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3288 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3289 sameRowDist = tsameDist[0]; 3290 } 3291 3292 if (sameRowDist) { 3293 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3294 /* isrow and iscol have same processor distribution as mat */ 3295 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3296 PetscFunctionReturn(0); 3297 } else { /* sameRowDist */ 3298 /* isrow has same processor distribution as mat */ 3299 if (call == MAT_INITIAL_MATRIX) { 3300 PetscBool sorted; 3301 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3302 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3303 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3304 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3305 3306 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3307 if (sorted) { 3308 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3309 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3310 PetscFunctionReturn(0); 3311 } 3312 } else { /* call == MAT_REUSE_MATRIX */ 3313 IS iscol_sub; 3314 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3315 if (iscol_sub) { 3316 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3317 PetscFunctionReturn(0); 3318 } 3319 } 3320 } 3321 } 3322 3323 /* General case: iscol -> iscol_local which has global size of iscol */ 3324 if (call == MAT_REUSE_MATRIX) { 3325 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3326 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3327 } else { 3328 if (!iscol_local) { 3329 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3330 } 3331 } 3332 3333 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3334 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3335 3336 if (call == MAT_INITIAL_MATRIX) { 3337 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3338 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3339 } 3340 PetscFunctionReturn(0); 3341 } 3342 3343 /*@C 3344 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3345 and "off-diagonal" part of the matrix in CSR format. 3346 3347 Collective on MPI_Comm 3348 3349 Input Parameters: 3350 + comm - MPI communicator 3351 . A - "diagonal" portion of matrix 3352 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3353 - garray - global index of B columns 3354 3355 Output Parameter: 3356 . mat - the matrix, with input A as its local diagonal matrix 3357 Level: advanced 3358 3359 Notes: 3360 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3361 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3362 3363 .seealso: MatCreateMPIAIJWithSplitArrays() 3364 @*/ 3365 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3366 { 3367 PetscErrorCode ierr; 3368 Mat_MPIAIJ *maij; 3369 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3370 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3371 PetscScalar *oa=b->a; 3372 Mat Bnew; 3373 PetscInt m,n,N; 3374 3375 PetscFunctionBegin; 3376 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3377 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3378 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3379 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3380 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3381 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3382 3383 /* Get global columns of mat */ 3384 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3385 3386 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3387 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3388 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3389 maij = (Mat_MPIAIJ*)(*mat)->data; 3390 3391 (*mat)->preallocated = PETSC_TRUE; 3392 3393 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3394 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3395 3396 /* Set A as diagonal portion of *mat */ 3397 maij->A = A; 3398 3399 nz = oi[m]; 3400 for (i=0; i<nz; i++) { 3401 col = oj[i]; 3402 oj[i] = garray[col]; 3403 } 3404 3405 /* Set Bnew as off-diagonal portion of *mat */ 3406 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3407 bnew = (Mat_SeqAIJ*)Bnew->data; 3408 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3409 maij->B = Bnew; 3410 3411 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3412 3413 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3414 b->free_a = PETSC_FALSE; 3415 b->free_ij = PETSC_FALSE; 3416 ierr = MatDestroy(&B);CHKERRQ(ierr); 3417 3418 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3419 bnew->free_a = PETSC_TRUE; 3420 bnew->free_ij = PETSC_TRUE; 3421 3422 /* condense columns of maij->B */ 3423 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3424 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3425 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3426 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3427 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3428 PetscFunctionReturn(0); 3429 } 3430 3431 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3432 3433 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3434 { 3435 PetscErrorCode ierr; 3436 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3437 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3438 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3439 Mat M,Msub,B=a->B; 3440 MatScalar *aa; 3441 Mat_SeqAIJ *aij; 3442 PetscInt *garray = a->garray,*colsub,Ncols; 3443 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3444 IS iscol_sub,iscmap; 3445 const PetscInt *is_idx,*cmap; 3446 PetscBool allcolumns=PETSC_FALSE; 3447 MPI_Comm comm; 3448 3449 PetscFunctionBegin; 3450 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3451 3452 if (call == MAT_REUSE_MATRIX) { 3453 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3454 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3455 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3456 3457 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3458 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3459 3460 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3461 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3462 3463 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3464 3465 } else { /* call == MAT_INITIAL_MATRIX) */ 3466 PetscBool flg; 3467 3468 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3469 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3470 3471 /* (1) iscol -> nonscalable iscol_local */ 3472 /* Check for special case: each processor gets entire matrix columns */ 3473 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3474 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3475 if (allcolumns) { 3476 iscol_sub = iscol_local; 3477 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3478 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3479 3480 } else { 3481 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3482 PetscInt *idx,*cmap1,k; 3483 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3484 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3485 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3486 count = 0; 3487 k = 0; 3488 for (i=0; i<Ncols; i++) { 3489 j = is_idx[i]; 3490 if (j >= cstart && j < cend) { 3491 /* diagonal part of mat */ 3492 idx[count] = j; 3493 cmap1[count++] = i; /* column index in submat */ 3494 } else if (Bn) { 3495 /* off-diagonal part of mat */ 3496 if (j == garray[k]) { 3497 idx[count] = j; 3498 cmap1[count++] = i; /* column index in submat */ 3499 } else if (j > garray[k]) { 3500 while (j > garray[k] && k < Bn-1) k++; 3501 if (j == garray[k]) { 3502 idx[count] = j; 3503 cmap1[count++] = i; /* column index in submat */ 3504 } 3505 } 3506 } 3507 } 3508 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3509 3510 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3511 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3512 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3513 3514 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3515 } 3516 3517 /* (3) Create sequential Msub */ 3518 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3519 } 3520 3521 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3522 aij = (Mat_SeqAIJ*)(Msub)->data; 3523 ii = aij->i; 3524 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3525 3526 /* 3527 m - number of local rows 3528 Ncols - number of columns (same on all processors) 3529 rstart - first row in new global matrix generated 3530 */ 3531 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3532 3533 if (call == MAT_INITIAL_MATRIX) { 3534 /* (4) Create parallel newmat */ 3535 PetscMPIInt rank,size; 3536 PetscInt csize; 3537 3538 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3539 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3540 3541 /* 3542 Determine the number of non-zeros in the diagonal and off-diagonal 3543 portions of the matrix in order to do correct preallocation 3544 */ 3545 3546 /* first get start and end of "diagonal" columns */ 3547 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3548 if (csize == PETSC_DECIDE) { 3549 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3550 if (mglobal == Ncols) { /* square matrix */ 3551 nlocal = m; 3552 } else { 3553 nlocal = Ncols/size + ((Ncols % size) > rank); 3554 } 3555 } else { 3556 nlocal = csize; 3557 } 3558 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3559 rstart = rend - nlocal; 3560 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3561 3562 /* next, compute all the lengths */ 3563 jj = aij->j; 3564 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3565 olens = dlens + m; 3566 for (i=0; i<m; i++) { 3567 jend = ii[i+1] - ii[i]; 3568 olen = 0; 3569 dlen = 0; 3570 for (j=0; j<jend; j++) { 3571 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3572 else dlen++; 3573 jj++; 3574 } 3575 olens[i] = olen; 3576 dlens[i] = dlen; 3577 } 3578 3579 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3580 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3581 3582 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3583 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3584 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3585 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3586 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3587 ierr = PetscFree(dlens);CHKERRQ(ierr); 3588 3589 } else { /* call == MAT_REUSE_MATRIX */ 3590 M = *newmat; 3591 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3592 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3593 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3594 /* 3595 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3596 rather than the slower MatSetValues(). 3597 */ 3598 M->was_assembled = PETSC_TRUE; 3599 M->assembled = PETSC_FALSE; 3600 } 3601 3602 /* (5) Set values of Msub to *newmat */ 3603 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3604 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3605 3606 jj = aij->j; 3607 aa = aij->a; 3608 for (i=0; i<m; i++) { 3609 row = rstart + i; 3610 nz = ii[i+1] - ii[i]; 3611 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3612 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3613 jj += nz; aa += nz; 3614 } 3615 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3616 3617 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3618 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3619 3620 ierr = PetscFree(colsub);CHKERRQ(ierr); 3621 3622 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3623 if (call == MAT_INITIAL_MATRIX) { 3624 *newmat = M; 3625 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3626 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3627 3628 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3629 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3630 3631 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3632 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3633 3634 if (iscol_local) { 3635 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3636 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3637 } 3638 } 3639 PetscFunctionReturn(0); 3640 } 3641 3642 /* 3643 Not great since it makes two copies of the submatrix, first an SeqAIJ 3644 in local and then by concatenating the local matrices the end result. 3645 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3646 3647 Note: This requires a sequential iscol with all indices. 3648 */ 3649 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3650 { 3651 PetscErrorCode ierr; 3652 PetscMPIInt rank,size; 3653 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3654 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3655 Mat M,Mreuse; 3656 MatScalar *aa,*vwork; 3657 MPI_Comm comm; 3658 Mat_SeqAIJ *aij; 3659 PetscBool colflag,allcolumns=PETSC_FALSE; 3660 3661 PetscFunctionBegin; 3662 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3663 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3664 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3665 3666 /* Check for special case: each processor gets entire matrix columns */ 3667 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3668 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3669 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3670 3671 if (call == MAT_REUSE_MATRIX) { 3672 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3673 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3674 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3675 } else { 3676 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3677 } 3678 3679 /* 3680 m - number of local rows 3681 n - number of columns (same on all processors) 3682 rstart - first row in new global matrix generated 3683 */ 3684 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3685 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3686 if (call == MAT_INITIAL_MATRIX) { 3687 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3688 ii = aij->i; 3689 jj = aij->j; 3690 3691 /* 3692 Determine the number of non-zeros in the diagonal and off-diagonal 3693 portions of the matrix in order to do correct preallocation 3694 */ 3695 3696 /* first get start and end of "diagonal" columns */ 3697 if (csize == PETSC_DECIDE) { 3698 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3699 if (mglobal == n) { /* square matrix */ 3700 nlocal = m; 3701 } else { 3702 nlocal = n/size + ((n % size) > rank); 3703 } 3704 } else { 3705 nlocal = csize; 3706 } 3707 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3708 rstart = rend - nlocal; 3709 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3710 3711 /* next, compute all the lengths */ 3712 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3713 olens = dlens + m; 3714 for (i=0; i<m; i++) { 3715 jend = ii[i+1] - ii[i]; 3716 olen = 0; 3717 dlen = 0; 3718 for (j=0; j<jend; j++) { 3719 if (*jj < rstart || *jj >= rend) olen++; 3720 else dlen++; 3721 jj++; 3722 } 3723 olens[i] = olen; 3724 dlens[i] = dlen; 3725 } 3726 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3727 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3728 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3729 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3730 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3731 ierr = PetscFree(dlens);CHKERRQ(ierr); 3732 } else { 3733 PetscInt ml,nl; 3734 3735 M = *newmat; 3736 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3737 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3738 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3739 /* 3740 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3741 rather than the slower MatSetValues(). 3742 */ 3743 M->was_assembled = PETSC_TRUE; 3744 M->assembled = PETSC_FALSE; 3745 } 3746 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3747 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3748 ii = aij->i; 3749 jj = aij->j; 3750 aa = aij->a; 3751 for (i=0; i<m; i++) { 3752 row = rstart + i; 3753 nz = ii[i+1] - ii[i]; 3754 cwork = jj; jj += nz; 3755 vwork = aa; aa += nz; 3756 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3757 } 3758 3759 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3760 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3761 *newmat = M; 3762 3763 /* save submatrix used in processor for next request */ 3764 if (call == MAT_INITIAL_MATRIX) { 3765 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3766 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3767 } 3768 PetscFunctionReturn(0); 3769 } 3770 3771 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3772 { 3773 PetscInt m,cstart, cend,j,nnz,i,d; 3774 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3775 const PetscInt *JJ; 3776 PetscScalar *values; 3777 PetscErrorCode ierr; 3778 PetscBool nooffprocentries; 3779 3780 PetscFunctionBegin; 3781 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3782 3783 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3784 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3785 m = B->rmap->n; 3786 cstart = B->cmap->rstart; 3787 cend = B->cmap->rend; 3788 rstart = B->rmap->rstart; 3789 3790 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3791 3792 #if defined(PETSC_USE_DEBUGGING) 3793 for (i=0; i<m; i++) { 3794 nnz = Ii[i+1]- Ii[i]; 3795 JJ = J + Ii[i]; 3796 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3797 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3798 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3799 } 3800 #endif 3801 3802 for (i=0; i<m; i++) { 3803 nnz = Ii[i+1]- Ii[i]; 3804 JJ = J + Ii[i]; 3805 nnz_max = PetscMax(nnz_max,nnz); 3806 d = 0; 3807 for (j=0; j<nnz; j++) { 3808 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3809 } 3810 d_nnz[i] = d; 3811 o_nnz[i] = nnz - d; 3812 } 3813 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3814 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3815 3816 if (v) values = (PetscScalar*)v; 3817 else { 3818 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3819 } 3820 3821 for (i=0; i<m; i++) { 3822 ii = i + rstart; 3823 nnz = Ii[i+1]- Ii[i]; 3824 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3825 } 3826 nooffprocentries = B->nooffprocentries; 3827 B->nooffprocentries = PETSC_TRUE; 3828 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3829 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3830 B->nooffprocentries = nooffprocentries; 3831 3832 if (!v) { 3833 ierr = PetscFree(values);CHKERRQ(ierr); 3834 } 3835 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3836 PetscFunctionReturn(0); 3837 } 3838 3839 /*@ 3840 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3841 (the default parallel PETSc format). 3842 3843 Collective on MPI_Comm 3844 3845 Input Parameters: 3846 + B - the matrix 3847 . i - the indices into j for the start of each local row (starts with zero) 3848 . j - the column indices for each local row (starts with zero) 3849 - v - optional values in the matrix 3850 3851 Level: developer 3852 3853 Notes: 3854 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3855 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3856 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3857 3858 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3859 3860 The format which is used for the sparse matrix input, is equivalent to a 3861 row-major ordering.. i.e for the following matrix, the input data expected is 3862 as shown 3863 3864 $ 1 0 0 3865 $ 2 0 3 P0 3866 $ ------- 3867 $ 4 5 6 P1 3868 $ 3869 $ Process0 [P0]: rows_owned=[0,1] 3870 $ i = {0,1,3} [size = nrow+1 = 2+1] 3871 $ j = {0,0,2} [size = 3] 3872 $ v = {1,2,3} [size = 3] 3873 $ 3874 $ Process1 [P1]: rows_owned=[2] 3875 $ i = {0,3} [size = nrow+1 = 1+1] 3876 $ j = {0,1,2} [size = 3] 3877 $ v = {4,5,6} [size = 3] 3878 3879 .keywords: matrix, aij, compressed row, sparse, parallel 3880 3881 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3882 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3883 @*/ 3884 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3885 { 3886 PetscErrorCode ierr; 3887 3888 PetscFunctionBegin; 3889 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3890 PetscFunctionReturn(0); 3891 } 3892 3893 /*@C 3894 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3895 (the default parallel PETSc format). For good matrix assembly performance 3896 the user should preallocate the matrix storage by setting the parameters 3897 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3898 performance can be increased by more than a factor of 50. 3899 3900 Collective on MPI_Comm 3901 3902 Input Parameters: 3903 + B - the matrix 3904 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3905 (same value is used for all local rows) 3906 . d_nnz - array containing the number of nonzeros in the various rows of the 3907 DIAGONAL portion of the local submatrix (possibly different for each row) 3908 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3909 The size of this array is equal to the number of local rows, i.e 'm'. 3910 For matrices that will be factored, you must leave room for (and set) 3911 the diagonal entry even if it is zero. 3912 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3913 submatrix (same value is used for all local rows). 3914 - o_nnz - array containing the number of nonzeros in the various rows of the 3915 OFF-DIAGONAL portion of the local submatrix (possibly different for 3916 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3917 structure. The size of this array is equal to the number 3918 of local rows, i.e 'm'. 3919 3920 If the *_nnz parameter is given then the *_nz parameter is ignored 3921 3922 The AIJ format (also called the Yale sparse matrix format or 3923 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3924 storage. The stored row and column indices begin with zero. 3925 See Users-Manual: ch_mat for details. 3926 3927 The parallel matrix is partitioned such that the first m0 rows belong to 3928 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3929 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3930 3931 The DIAGONAL portion of the local submatrix of a processor can be defined 3932 as the submatrix which is obtained by extraction the part corresponding to 3933 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3934 first row that belongs to the processor, r2 is the last row belonging to 3935 the this processor, and c1-c2 is range of indices of the local part of a 3936 vector suitable for applying the matrix to. This is an mxn matrix. In the 3937 common case of a square matrix, the row and column ranges are the same and 3938 the DIAGONAL part is also square. The remaining portion of the local 3939 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3940 3941 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3942 3943 You can call MatGetInfo() to get information on how effective the preallocation was; 3944 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3945 You can also run with the option -info and look for messages with the string 3946 malloc in them to see if additional memory allocation was needed. 3947 3948 Example usage: 3949 3950 Consider the following 8x8 matrix with 34 non-zero values, that is 3951 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3952 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3953 as follows: 3954 3955 .vb 3956 1 2 0 | 0 3 0 | 0 4 3957 Proc0 0 5 6 | 7 0 0 | 8 0 3958 9 0 10 | 11 0 0 | 12 0 3959 ------------------------------------- 3960 13 0 14 | 15 16 17 | 0 0 3961 Proc1 0 18 0 | 19 20 21 | 0 0 3962 0 0 0 | 22 23 0 | 24 0 3963 ------------------------------------- 3964 Proc2 25 26 27 | 0 0 28 | 29 0 3965 30 0 0 | 31 32 33 | 0 34 3966 .ve 3967 3968 This can be represented as a collection of submatrices as: 3969 3970 .vb 3971 A B C 3972 D E F 3973 G H I 3974 .ve 3975 3976 Where the submatrices A,B,C are owned by proc0, D,E,F are 3977 owned by proc1, G,H,I are owned by proc2. 3978 3979 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3980 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3981 The 'M','N' parameters are 8,8, and have the same values on all procs. 3982 3983 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3984 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3985 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3986 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3987 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3988 matrix, ans [DF] as another SeqAIJ matrix. 3989 3990 When d_nz, o_nz parameters are specified, d_nz storage elements are 3991 allocated for every row of the local diagonal submatrix, and o_nz 3992 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3993 One way to choose d_nz and o_nz is to use the max nonzerors per local 3994 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3995 In this case, the values of d_nz,o_nz are: 3996 .vb 3997 proc0 : dnz = 2, o_nz = 2 3998 proc1 : dnz = 3, o_nz = 2 3999 proc2 : dnz = 1, o_nz = 4 4000 .ve 4001 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4002 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4003 for proc3. i.e we are using 12+15+10=37 storage locations to store 4004 34 values. 4005 4006 When d_nnz, o_nnz parameters are specified, the storage is specified 4007 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4008 In the above case the values for d_nnz,o_nnz are: 4009 .vb 4010 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4011 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4012 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4013 .ve 4014 Here the space allocated is sum of all the above values i.e 34, and 4015 hence pre-allocation is perfect. 4016 4017 Level: intermediate 4018 4019 .keywords: matrix, aij, compressed row, sparse, parallel 4020 4021 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4022 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4023 @*/ 4024 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4025 { 4026 PetscErrorCode ierr; 4027 4028 PetscFunctionBegin; 4029 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4030 PetscValidType(B,1); 4031 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4032 PetscFunctionReturn(0); 4033 } 4034 4035 /*@ 4036 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4037 CSR format the local rows. 4038 4039 Collective on MPI_Comm 4040 4041 Input Parameters: 4042 + comm - MPI communicator 4043 . m - number of local rows (Cannot be PETSC_DECIDE) 4044 . n - This value should be the same as the local size used in creating the 4045 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4046 calculated if N is given) For square matrices n is almost always m. 4047 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4048 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4049 . i - row indices 4050 . j - column indices 4051 - a - matrix values 4052 4053 Output Parameter: 4054 . mat - the matrix 4055 4056 Level: intermediate 4057 4058 Notes: 4059 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4060 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4061 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4062 4063 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4064 4065 The format which is used for the sparse matrix input, is equivalent to a 4066 row-major ordering.. i.e for the following matrix, the input data expected is 4067 as shown 4068 4069 $ 1 0 0 4070 $ 2 0 3 P0 4071 $ ------- 4072 $ 4 5 6 P1 4073 $ 4074 $ Process0 [P0]: rows_owned=[0,1] 4075 $ i = {0,1,3} [size = nrow+1 = 2+1] 4076 $ j = {0,0,2} [size = 3] 4077 $ v = {1,2,3} [size = 3] 4078 $ 4079 $ Process1 [P1]: rows_owned=[2] 4080 $ i = {0,3} [size = nrow+1 = 1+1] 4081 $ j = {0,1,2} [size = 3] 4082 $ v = {4,5,6} [size = 3] 4083 4084 .keywords: matrix, aij, compressed row, sparse, parallel 4085 4086 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4087 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4088 @*/ 4089 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4090 { 4091 PetscErrorCode ierr; 4092 4093 PetscFunctionBegin; 4094 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4095 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4096 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4097 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4098 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4099 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4100 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4101 PetscFunctionReturn(0); 4102 } 4103 4104 /*@C 4105 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4106 (the default parallel PETSc format). For good matrix assembly performance 4107 the user should preallocate the matrix storage by setting the parameters 4108 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4109 performance can be increased by more than a factor of 50. 4110 4111 Collective on MPI_Comm 4112 4113 Input Parameters: 4114 + comm - MPI communicator 4115 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4116 This value should be the same as the local size used in creating the 4117 y vector for the matrix-vector product y = Ax. 4118 . n - This value should be the same as the local size used in creating the 4119 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4120 calculated if N is given) For square matrices n is almost always m. 4121 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4122 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4123 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4124 (same value is used for all local rows) 4125 . d_nnz - array containing the number of nonzeros in the various rows of the 4126 DIAGONAL portion of the local submatrix (possibly different for each row) 4127 or NULL, if d_nz is used to specify the nonzero structure. 4128 The size of this array is equal to the number of local rows, i.e 'm'. 4129 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4130 submatrix (same value is used for all local rows). 4131 - o_nnz - array containing the number of nonzeros in the various rows of the 4132 OFF-DIAGONAL portion of the local submatrix (possibly different for 4133 each row) or NULL, if o_nz is used to specify the nonzero 4134 structure. The size of this array is equal to the number 4135 of local rows, i.e 'm'. 4136 4137 Output Parameter: 4138 . A - the matrix 4139 4140 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4141 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4142 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4143 4144 Notes: 4145 If the *_nnz parameter is given then the *_nz parameter is ignored 4146 4147 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4148 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4149 storage requirements for this matrix. 4150 4151 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4152 processor than it must be used on all processors that share the object for 4153 that argument. 4154 4155 The user MUST specify either the local or global matrix dimensions 4156 (possibly both). 4157 4158 The parallel matrix is partitioned across processors such that the 4159 first m0 rows belong to process 0, the next m1 rows belong to 4160 process 1, the next m2 rows belong to process 2 etc.. where 4161 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4162 values corresponding to [m x N] submatrix. 4163 4164 The columns are logically partitioned with the n0 columns belonging 4165 to 0th partition, the next n1 columns belonging to the next 4166 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4167 4168 The DIAGONAL portion of the local submatrix on any given processor 4169 is the submatrix corresponding to the rows and columns m,n 4170 corresponding to the given processor. i.e diagonal matrix on 4171 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4172 etc. The remaining portion of the local submatrix [m x (N-n)] 4173 constitute the OFF-DIAGONAL portion. The example below better 4174 illustrates this concept. 4175 4176 For a square global matrix we define each processor's diagonal portion 4177 to be its local rows and the corresponding columns (a square submatrix); 4178 each processor's off-diagonal portion encompasses the remainder of the 4179 local matrix (a rectangular submatrix). 4180 4181 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4182 4183 When calling this routine with a single process communicator, a matrix of 4184 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4185 type of communicator, use the construction mechanism 4186 .vb 4187 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4188 .ve 4189 4190 $ MatCreate(...,&A); 4191 $ MatSetType(A,MATMPIAIJ); 4192 $ MatSetSizes(A, m,n,M,N); 4193 $ MatMPIAIJSetPreallocation(A,...); 4194 4195 By default, this format uses inodes (identical nodes) when possible. 4196 We search for consecutive rows with the same nonzero structure, thereby 4197 reusing matrix information to achieve increased efficiency. 4198 4199 Options Database Keys: 4200 + -mat_no_inode - Do not use inodes 4201 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4202 4203 4204 4205 Example usage: 4206 4207 Consider the following 8x8 matrix with 34 non-zero values, that is 4208 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4209 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4210 as follows 4211 4212 .vb 4213 1 2 0 | 0 3 0 | 0 4 4214 Proc0 0 5 6 | 7 0 0 | 8 0 4215 9 0 10 | 11 0 0 | 12 0 4216 ------------------------------------- 4217 13 0 14 | 15 16 17 | 0 0 4218 Proc1 0 18 0 | 19 20 21 | 0 0 4219 0 0 0 | 22 23 0 | 24 0 4220 ------------------------------------- 4221 Proc2 25 26 27 | 0 0 28 | 29 0 4222 30 0 0 | 31 32 33 | 0 34 4223 .ve 4224 4225 This can be represented as a collection of submatrices as 4226 4227 .vb 4228 A B C 4229 D E F 4230 G H I 4231 .ve 4232 4233 Where the submatrices A,B,C are owned by proc0, D,E,F are 4234 owned by proc1, G,H,I are owned by proc2. 4235 4236 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4237 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4238 The 'M','N' parameters are 8,8, and have the same values on all procs. 4239 4240 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4241 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4242 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4243 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4244 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4245 matrix, ans [DF] as another SeqAIJ matrix. 4246 4247 When d_nz, o_nz parameters are specified, d_nz storage elements are 4248 allocated for every row of the local diagonal submatrix, and o_nz 4249 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4250 One way to choose d_nz and o_nz is to use the max nonzerors per local 4251 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4252 In this case, the values of d_nz,o_nz are 4253 .vb 4254 proc0 : dnz = 2, o_nz = 2 4255 proc1 : dnz = 3, o_nz = 2 4256 proc2 : dnz = 1, o_nz = 4 4257 .ve 4258 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4259 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4260 for proc3. i.e we are using 12+15+10=37 storage locations to store 4261 34 values. 4262 4263 When d_nnz, o_nnz parameters are specified, the storage is specified 4264 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4265 In the above case the values for d_nnz,o_nnz are 4266 .vb 4267 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4268 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4269 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4270 .ve 4271 Here the space allocated is sum of all the above values i.e 34, and 4272 hence pre-allocation is perfect. 4273 4274 Level: intermediate 4275 4276 .keywords: matrix, aij, compressed row, sparse, parallel 4277 4278 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4279 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4280 @*/ 4281 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4282 { 4283 PetscErrorCode ierr; 4284 PetscMPIInt size; 4285 4286 PetscFunctionBegin; 4287 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4288 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4289 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4290 if (size > 1) { 4291 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4292 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4293 } else { 4294 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4295 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4296 } 4297 PetscFunctionReturn(0); 4298 } 4299 4300 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4301 { 4302 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4303 PetscBool flg; 4304 PetscErrorCode ierr; 4305 4306 PetscFunctionBegin; 4307 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4308 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4309 if (Ad) *Ad = a->A; 4310 if (Ao) *Ao = a->B; 4311 if (colmap) *colmap = a->garray; 4312 PetscFunctionReturn(0); 4313 } 4314 4315 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4316 { 4317 PetscErrorCode ierr; 4318 PetscInt m,N,i,rstart,nnz,Ii; 4319 PetscInt *indx; 4320 PetscScalar *values; 4321 4322 PetscFunctionBegin; 4323 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4324 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4325 PetscInt *dnz,*onz,sum,bs,cbs; 4326 4327 if (n == PETSC_DECIDE) { 4328 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4329 } 4330 /* Check sum(n) = N */ 4331 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4332 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4333 4334 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4335 rstart -= m; 4336 4337 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4338 for (i=0; i<m; i++) { 4339 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4340 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4341 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4342 } 4343 4344 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4345 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4346 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4347 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4348 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4349 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4350 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4351 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4352 } 4353 4354 /* numeric phase */ 4355 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4356 for (i=0; i<m; i++) { 4357 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4358 Ii = i + rstart; 4359 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4360 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4361 } 4362 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4363 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4364 PetscFunctionReturn(0); 4365 } 4366 4367 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4368 { 4369 PetscErrorCode ierr; 4370 PetscMPIInt rank; 4371 PetscInt m,N,i,rstart,nnz; 4372 size_t len; 4373 const PetscInt *indx; 4374 PetscViewer out; 4375 char *name; 4376 Mat B; 4377 const PetscScalar *values; 4378 4379 PetscFunctionBegin; 4380 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4381 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4382 /* Should this be the type of the diagonal block of A? */ 4383 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4384 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4385 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4386 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4387 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4388 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4389 for (i=0; i<m; i++) { 4390 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4391 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4392 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4393 } 4394 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4395 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4396 4397 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4398 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4399 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4400 sprintf(name,"%s.%d",outfile,rank); 4401 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4402 ierr = PetscFree(name);CHKERRQ(ierr); 4403 ierr = MatView(B,out);CHKERRQ(ierr); 4404 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4405 ierr = MatDestroy(&B);CHKERRQ(ierr); 4406 PetscFunctionReturn(0); 4407 } 4408 4409 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4410 { 4411 PetscErrorCode ierr; 4412 Mat_Merge_SeqsToMPI *merge; 4413 PetscContainer container; 4414 4415 PetscFunctionBegin; 4416 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4417 if (container) { 4418 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4419 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4420 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4421 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4422 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4423 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4424 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4425 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4426 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4427 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4428 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4429 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4430 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4431 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4432 ierr = PetscFree(merge);CHKERRQ(ierr); 4433 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4434 } 4435 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4436 PetscFunctionReturn(0); 4437 } 4438 4439 #include <../src/mat/utils/freespace.h> 4440 #include <petscbt.h> 4441 4442 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4443 { 4444 PetscErrorCode ierr; 4445 MPI_Comm comm; 4446 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4447 PetscMPIInt size,rank,taga,*len_s; 4448 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4449 PetscInt proc,m; 4450 PetscInt **buf_ri,**buf_rj; 4451 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4452 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4453 MPI_Request *s_waits,*r_waits; 4454 MPI_Status *status; 4455 MatScalar *aa=a->a; 4456 MatScalar **abuf_r,*ba_i; 4457 Mat_Merge_SeqsToMPI *merge; 4458 PetscContainer container; 4459 4460 PetscFunctionBegin; 4461 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4462 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4463 4464 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4465 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4466 4467 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4468 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4469 4470 bi = merge->bi; 4471 bj = merge->bj; 4472 buf_ri = merge->buf_ri; 4473 buf_rj = merge->buf_rj; 4474 4475 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4476 owners = merge->rowmap->range; 4477 len_s = merge->len_s; 4478 4479 /* send and recv matrix values */ 4480 /*-----------------------------*/ 4481 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4482 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4483 4484 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4485 for (proc=0,k=0; proc<size; proc++) { 4486 if (!len_s[proc]) continue; 4487 i = owners[proc]; 4488 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4489 k++; 4490 } 4491 4492 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4493 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4494 ierr = PetscFree(status);CHKERRQ(ierr); 4495 4496 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4497 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4498 4499 /* insert mat values of mpimat */ 4500 /*----------------------------*/ 4501 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4502 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4503 4504 for (k=0; k<merge->nrecv; k++) { 4505 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4506 nrows = *(buf_ri_k[k]); 4507 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4508 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4509 } 4510 4511 /* set values of ba */ 4512 m = merge->rowmap->n; 4513 for (i=0; i<m; i++) { 4514 arow = owners[rank] + i; 4515 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4516 bnzi = bi[i+1] - bi[i]; 4517 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4518 4519 /* add local non-zero vals of this proc's seqmat into ba */ 4520 anzi = ai[arow+1] - ai[arow]; 4521 aj = a->j + ai[arow]; 4522 aa = a->a + ai[arow]; 4523 nextaj = 0; 4524 for (j=0; nextaj<anzi; j++) { 4525 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4526 ba_i[j] += aa[nextaj++]; 4527 } 4528 } 4529 4530 /* add received vals into ba */ 4531 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4532 /* i-th row */ 4533 if (i == *nextrow[k]) { 4534 anzi = *(nextai[k]+1) - *nextai[k]; 4535 aj = buf_rj[k] + *(nextai[k]); 4536 aa = abuf_r[k] + *(nextai[k]); 4537 nextaj = 0; 4538 for (j=0; nextaj<anzi; j++) { 4539 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4540 ba_i[j] += aa[nextaj++]; 4541 } 4542 } 4543 nextrow[k]++; nextai[k]++; 4544 } 4545 } 4546 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4547 } 4548 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4549 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4550 4551 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4552 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4553 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4554 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4555 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4556 PetscFunctionReturn(0); 4557 } 4558 4559 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4560 { 4561 PetscErrorCode ierr; 4562 Mat B_mpi; 4563 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4564 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4565 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4566 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4567 PetscInt len,proc,*dnz,*onz,bs,cbs; 4568 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4569 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4570 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4571 MPI_Status *status; 4572 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4573 PetscBT lnkbt; 4574 Mat_Merge_SeqsToMPI *merge; 4575 PetscContainer container; 4576 4577 PetscFunctionBegin; 4578 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4579 4580 /* make sure it is a PETSc comm */ 4581 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4582 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4583 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4584 4585 ierr = PetscNew(&merge);CHKERRQ(ierr); 4586 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4587 4588 /* determine row ownership */ 4589 /*---------------------------------------------------------*/ 4590 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4591 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4592 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4593 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4594 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4595 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4596 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4597 4598 m = merge->rowmap->n; 4599 owners = merge->rowmap->range; 4600 4601 /* determine the number of messages to send, their lengths */ 4602 /*---------------------------------------------------------*/ 4603 len_s = merge->len_s; 4604 4605 len = 0; /* length of buf_si[] */ 4606 merge->nsend = 0; 4607 for (proc=0; proc<size; proc++) { 4608 len_si[proc] = 0; 4609 if (proc == rank) { 4610 len_s[proc] = 0; 4611 } else { 4612 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4613 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4614 } 4615 if (len_s[proc]) { 4616 merge->nsend++; 4617 nrows = 0; 4618 for (i=owners[proc]; i<owners[proc+1]; i++) { 4619 if (ai[i+1] > ai[i]) nrows++; 4620 } 4621 len_si[proc] = 2*(nrows+1); 4622 len += len_si[proc]; 4623 } 4624 } 4625 4626 /* determine the number and length of messages to receive for ij-structure */ 4627 /*-------------------------------------------------------------------------*/ 4628 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4629 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4630 4631 /* post the Irecv of j-structure */ 4632 /*-------------------------------*/ 4633 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4634 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4635 4636 /* post the Isend of j-structure */ 4637 /*--------------------------------*/ 4638 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4639 4640 for (proc=0, k=0; proc<size; proc++) { 4641 if (!len_s[proc]) continue; 4642 i = owners[proc]; 4643 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4644 k++; 4645 } 4646 4647 /* receives and sends of j-structure are complete */ 4648 /*------------------------------------------------*/ 4649 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4650 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4651 4652 /* send and recv i-structure */ 4653 /*---------------------------*/ 4654 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4655 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4656 4657 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4658 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4659 for (proc=0,k=0; proc<size; proc++) { 4660 if (!len_s[proc]) continue; 4661 /* form outgoing message for i-structure: 4662 buf_si[0]: nrows to be sent 4663 [1:nrows]: row index (global) 4664 [nrows+1:2*nrows+1]: i-structure index 4665 */ 4666 /*-------------------------------------------*/ 4667 nrows = len_si[proc]/2 - 1; 4668 buf_si_i = buf_si + nrows+1; 4669 buf_si[0] = nrows; 4670 buf_si_i[0] = 0; 4671 nrows = 0; 4672 for (i=owners[proc]; i<owners[proc+1]; i++) { 4673 anzi = ai[i+1] - ai[i]; 4674 if (anzi) { 4675 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4676 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4677 nrows++; 4678 } 4679 } 4680 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4681 k++; 4682 buf_si += len_si[proc]; 4683 } 4684 4685 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4686 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4687 4688 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4689 for (i=0; i<merge->nrecv; i++) { 4690 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4691 } 4692 4693 ierr = PetscFree(len_si);CHKERRQ(ierr); 4694 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4695 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4696 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4697 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4698 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4699 ierr = PetscFree(status);CHKERRQ(ierr); 4700 4701 /* compute a local seq matrix in each processor */ 4702 /*----------------------------------------------*/ 4703 /* allocate bi array and free space for accumulating nonzero column info */ 4704 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4705 bi[0] = 0; 4706 4707 /* create and initialize a linked list */ 4708 nlnk = N+1; 4709 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4710 4711 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4712 len = ai[owners[rank+1]] - ai[owners[rank]]; 4713 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4714 4715 current_space = free_space; 4716 4717 /* determine symbolic info for each local row */ 4718 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4719 4720 for (k=0; k<merge->nrecv; k++) { 4721 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4722 nrows = *buf_ri_k[k]; 4723 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4724 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4725 } 4726 4727 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4728 len = 0; 4729 for (i=0; i<m; i++) { 4730 bnzi = 0; 4731 /* add local non-zero cols of this proc's seqmat into lnk */ 4732 arow = owners[rank] + i; 4733 anzi = ai[arow+1] - ai[arow]; 4734 aj = a->j + ai[arow]; 4735 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4736 bnzi += nlnk; 4737 /* add received col data into lnk */ 4738 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4739 if (i == *nextrow[k]) { /* i-th row */ 4740 anzi = *(nextai[k]+1) - *nextai[k]; 4741 aj = buf_rj[k] + *nextai[k]; 4742 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4743 bnzi += nlnk; 4744 nextrow[k]++; nextai[k]++; 4745 } 4746 } 4747 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4748 4749 /* if free space is not available, make more free space */ 4750 if (current_space->local_remaining<bnzi) { 4751 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4752 nspacedouble++; 4753 } 4754 /* copy data into free space, then initialize lnk */ 4755 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4756 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4757 4758 current_space->array += bnzi; 4759 current_space->local_used += bnzi; 4760 current_space->local_remaining -= bnzi; 4761 4762 bi[i+1] = bi[i] + bnzi; 4763 } 4764 4765 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4766 4767 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4768 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4769 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4770 4771 /* create symbolic parallel matrix B_mpi */ 4772 /*---------------------------------------*/ 4773 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4774 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4775 if (n==PETSC_DECIDE) { 4776 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4777 } else { 4778 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4779 } 4780 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4781 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4782 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4783 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4784 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4785 4786 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4787 B_mpi->assembled = PETSC_FALSE; 4788 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4789 merge->bi = bi; 4790 merge->bj = bj; 4791 merge->buf_ri = buf_ri; 4792 merge->buf_rj = buf_rj; 4793 merge->coi = NULL; 4794 merge->coj = NULL; 4795 merge->owners_co = NULL; 4796 4797 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4798 4799 /* attach the supporting struct to B_mpi for reuse */ 4800 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4801 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4802 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4803 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4804 *mpimat = B_mpi; 4805 4806 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4807 PetscFunctionReturn(0); 4808 } 4809 4810 /*@C 4811 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4812 matrices from each processor 4813 4814 Collective on MPI_Comm 4815 4816 Input Parameters: 4817 + comm - the communicators the parallel matrix will live on 4818 . seqmat - the input sequential matrices 4819 . m - number of local rows (or PETSC_DECIDE) 4820 . n - number of local columns (or PETSC_DECIDE) 4821 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4822 4823 Output Parameter: 4824 . mpimat - the parallel matrix generated 4825 4826 Level: advanced 4827 4828 Notes: 4829 The dimensions of the sequential matrix in each processor MUST be the same. 4830 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4831 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4832 @*/ 4833 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4834 { 4835 PetscErrorCode ierr; 4836 PetscMPIInt size; 4837 4838 PetscFunctionBegin; 4839 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4840 if (size == 1) { 4841 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4842 if (scall == MAT_INITIAL_MATRIX) { 4843 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4844 } else { 4845 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4846 } 4847 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4848 PetscFunctionReturn(0); 4849 } 4850 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4851 if (scall == MAT_INITIAL_MATRIX) { 4852 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4853 } 4854 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4855 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4856 PetscFunctionReturn(0); 4857 } 4858 4859 /*@ 4860 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4861 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4862 with MatGetSize() 4863 4864 Not Collective 4865 4866 Input Parameters: 4867 + A - the matrix 4868 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4869 4870 Output Parameter: 4871 . A_loc - the local sequential matrix generated 4872 4873 Level: developer 4874 4875 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4876 4877 @*/ 4878 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4879 { 4880 PetscErrorCode ierr; 4881 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4882 Mat_SeqAIJ *mat,*a,*b; 4883 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4884 MatScalar *aa,*ba,*cam; 4885 PetscScalar *ca; 4886 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4887 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4888 PetscBool match; 4889 MPI_Comm comm; 4890 PetscMPIInt size; 4891 4892 PetscFunctionBegin; 4893 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4894 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4895 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4896 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4897 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4898 4899 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4900 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4901 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4902 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4903 aa = a->a; ba = b->a; 4904 if (scall == MAT_INITIAL_MATRIX) { 4905 if (size == 1) { 4906 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4907 PetscFunctionReturn(0); 4908 } 4909 4910 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4911 ci[0] = 0; 4912 for (i=0; i<am; i++) { 4913 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4914 } 4915 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4916 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4917 k = 0; 4918 for (i=0; i<am; i++) { 4919 ncols_o = bi[i+1] - bi[i]; 4920 ncols_d = ai[i+1] - ai[i]; 4921 /* off-diagonal portion of A */ 4922 for (jo=0; jo<ncols_o; jo++) { 4923 col = cmap[*bj]; 4924 if (col >= cstart) break; 4925 cj[k] = col; bj++; 4926 ca[k++] = *ba++; 4927 } 4928 /* diagonal portion of A */ 4929 for (j=0; j<ncols_d; j++) { 4930 cj[k] = cstart + *aj++; 4931 ca[k++] = *aa++; 4932 } 4933 /* off-diagonal portion of A */ 4934 for (j=jo; j<ncols_o; j++) { 4935 cj[k] = cmap[*bj++]; 4936 ca[k++] = *ba++; 4937 } 4938 } 4939 /* put together the new matrix */ 4940 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4941 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4942 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4943 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4944 mat->free_a = PETSC_TRUE; 4945 mat->free_ij = PETSC_TRUE; 4946 mat->nonew = 0; 4947 } else if (scall == MAT_REUSE_MATRIX) { 4948 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4949 ci = mat->i; cj = mat->j; cam = mat->a; 4950 for (i=0; i<am; i++) { 4951 /* off-diagonal portion of A */ 4952 ncols_o = bi[i+1] - bi[i]; 4953 for (jo=0; jo<ncols_o; jo++) { 4954 col = cmap[*bj]; 4955 if (col >= cstart) break; 4956 *cam++ = *ba++; bj++; 4957 } 4958 /* diagonal portion of A */ 4959 ncols_d = ai[i+1] - ai[i]; 4960 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4961 /* off-diagonal portion of A */ 4962 for (j=jo; j<ncols_o; j++) { 4963 *cam++ = *ba++; bj++; 4964 } 4965 } 4966 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4967 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4968 PetscFunctionReturn(0); 4969 } 4970 4971 /*@C 4972 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4973 4974 Not Collective 4975 4976 Input Parameters: 4977 + A - the matrix 4978 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4979 - row, col - index sets of rows and columns to extract (or NULL) 4980 4981 Output Parameter: 4982 . A_loc - the local sequential matrix generated 4983 4984 Level: developer 4985 4986 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4987 4988 @*/ 4989 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4990 { 4991 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4992 PetscErrorCode ierr; 4993 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4994 IS isrowa,iscola; 4995 Mat *aloc; 4996 PetscBool match; 4997 4998 PetscFunctionBegin; 4999 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5000 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5001 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5002 if (!row) { 5003 start = A->rmap->rstart; end = A->rmap->rend; 5004 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5005 } else { 5006 isrowa = *row; 5007 } 5008 if (!col) { 5009 start = A->cmap->rstart; 5010 cmap = a->garray; 5011 nzA = a->A->cmap->n; 5012 nzB = a->B->cmap->n; 5013 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5014 ncols = 0; 5015 for (i=0; i<nzB; i++) { 5016 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5017 else break; 5018 } 5019 imark = i; 5020 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5021 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5022 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5023 } else { 5024 iscola = *col; 5025 } 5026 if (scall != MAT_INITIAL_MATRIX) { 5027 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5028 aloc[0] = *A_loc; 5029 } 5030 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5031 *A_loc = aloc[0]; 5032 ierr = PetscFree(aloc);CHKERRQ(ierr); 5033 if (!row) { 5034 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5035 } 5036 if (!col) { 5037 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5038 } 5039 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5040 PetscFunctionReturn(0); 5041 } 5042 5043 /*@C 5044 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5045 5046 Collective on Mat 5047 5048 Input Parameters: 5049 + A,B - the matrices in mpiaij format 5050 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5051 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5052 5053 Output Parameter: 5054 + rowb, colb - index sets of rows and columns of B to extract 5055 - B_seq - the sequential matrix generated 5056 5057 Level: developer 5058 5059 @*/ 5060 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5061 { 5062 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5063 PetscErrorCode ierr; 5064 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5065 IS isrowb,iscolb; 5066 Mat *bseq=NULL; 5067 5068 PetscFunctionBegin; 5069 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5070 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5071 } 5072 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5073 5074 if (scall == MAT_INITIAL_MATRIX) { 5075 start = A->cmap->rstart; 5076 cmap = a->garray; 5077 nzA = a->A->cmap->n; 5078 nzB = a->B->cmap->n; 5079 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5080 ncols = 0; 5081 for (i=0; i<nzB; i++) { /* row < local row index */ 5082 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5083 else break; 5084 } 5085 imark = i; 5086 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5087 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5088 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5089 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5090 } else { 5091 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5092 isrowb = *rowb; iscolb = *colb; 5093 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5094 bseq[0] = *B_seq; 5095 } 5096 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5097 *B_seq = bseq[0]; 5098 ierr = PetscFree(bseq);CHKERRQ(ierr); 5099 if (!rowb) { 5100 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5101 } else { 5102 *rowb = isrowb; 5103 } 5104 if (!colb) { 5105 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5106 } else { 5107 *colb = iscolb; 5108 } 5109 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5110 PetscFunctionReturn(0); 5111 } 5112 5113 /* 5114 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5115 of the OFF-DIAGONAL portion of local A 5116 5117 Collective on Mat 5118 5119 Input Parameters: 5120 + A,B - the matrices in mpiaij format 5121 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5122 5123 Output Parameter: 5124 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5125 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5126 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5127 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5128 5129 Level: developer 5130 5131 */ 5132 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5133 { 5134 VecScatter_MPI_General *gen_to,*gen_from; 5135 PetscErrorCode ierr; 5136 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5137 Mat_SeqAIJ *b_oth; 5138 VecScatter ctx; 5139 MPI_Comm comm; 5140 PetscMPIInt *rprocs,*sprocs,tag,rank; 5141 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5142 PetscInt *rvalues,*svalues; 5143 MatScalar *b_otha,*bufa,*bufA; 5144 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5145 MPI_Request *rwaits = NULL,*swaits = NULL; 5146 MPI_Status *sstatus,rstatus; 5147 PetscMPIInt jj,size; 5148 PetscInt *cols,sbs,rbs; 5149 PetscScalar *vals; 5150 5151 PetscFunctionBegin; 5152 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5153 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5154 5155 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5156 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5157 } 5158 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5159 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5160 5161 if (size == 1) { 5162 startsj_s = NULL; 5163 bufa_ptr = NULL; 5164 *B_oth = NULL; 5165 PetscFunctionReturn(0); 5166 } 5167 5168 ctx = a->Mvctx; 5169 if (a->Mvctx->mpi3 && !a->Mvctx_mpi1) { 5170 /* a->Mvctx is type of MPI3 which is not implemented for Mat-Mat ops, 5171 thus create a->Mvctx_mpi1 */ 5172 a->Mvctx_mpi1_flg = PETSC_TRUE; 5173 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5174 ctx = a->Mvctx_mpi1; 5175 } 5176 tag = ((PetscObject)ctx)->tag; 5177 5178 gen_to = (VecScatter_MPI_General*)ctx->todata; 5179 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5180 nrecvs = gen_from->n; 5181 nsends = gen_to->n; 5182 5183 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5184 srow = gen_to->indices; /* local row index to be sent */ 5185 sstarts = gen_to->starts; 5186 sprocs = gen_to->procs; 5187 sstatus = gen_to->sstatus; 5188 sbs = gen_to->bs; 5189 rstarts = gen_from->starts; 5190 rprocs = gen_from->procs; 5191 rbs = gen_from->bs; 5192 5193 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5194 if (scall == MAT_INITIAL_MATRIX) { 5195 /* i-array */ 5196 /*---------*/ 5197 /* post receives */ 5198 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5199 for (i=0; i<nrecvs; i++) { 5200 rowlen = rvalues + rstarts[i]*rbs; 5201 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5202 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5203 } 5204 5205 /* pack the outgoing message */ 5206 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5207 5208 sstartsj[0] = 0; 5209 rstartsj[0] = 0; 5210 len = 0; /* total length of j or a array to be sent */ 5211 k = 0; 5212 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5213 for (i=0; i<nsends; i++) { 5214 rowlen = svalues + sstarts[i]*sbs; 5215 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5216 for (j=0; j<nrows; j++) { 5217 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5218 for (l=0; l<sbs; l++) { 5219 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5220 5221 rowlen[j*sbs+l] = ncols; 5222 5223 len += ncols; 5224 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5225 } 5226 k++; 5227 } 5228 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5229 5230 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5231 } 5232 /* recvs and sends of i-array are completed */ 5233 i = nrecvs; 5234 while (i--) { 5235 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5236 } 5237 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5238 ierr = PetscFree(svalues);CHKERRQ(ierr); 5239 5240 /* allocate buffers for sending j and a arrays */ 5241 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5242 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5243 5244 /* create i-array of B_oth */ 5245 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5246 5247 b_othi[0] = 0; 5248 len = 0; /* total length of j or a array to be received */ 5249 k = 0; 5250 for (i=0; i<nrecvs; i++) { 5251 rowlen = rvalues + rstarts[i]*rbs; 5252 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5253 for (j=0; j<nrows; j++) { 5254 b_othi[k+1] = b_othi[k] + rowlen[j]; 5255 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5256 k++; 5257 } 5258 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5259 } 5260 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5261 5262 /* allocate space for j and a arrrays of B_oth */ 5263 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5264 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5265 5266 /* j-array */ 5267 /*---------*/ 5268 /* post receives of j-array */ 5269 for (i=0; i<nrecvs; i++) { 5270 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5271 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5272 } 5273 5274 /* pack the outgoing message j-array */ 5275 k = 0; 5276 for (i=0; i<nsends; i++) { 5277 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5278 bufJ = bufj+sstartsj[i]; 5279 for (j=0; j<nrows; j++) { 5280 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5281 for (ll=0; ll<sbs; ll++) { 5282 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5283 for (l=0; l<ncols; l++) { 5284 *bufJ++ = cols[l]; 5285 } 5286 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5287 } 5288 } 5289 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5290 } 5291 5292 /* recvs and sends of j-array are completed */ 5293 i = nrecvs; 5294 while (i--) { 5295 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5296 } 5297 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5298 } else if (scall == MAT_REUSE_MATRIX) { 5299 sstartsj = *startsj_s; 5300 rstartsj = *startsj_r; 5301 bufa = *bufa_ptr; 5302 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5303 b_otha = b_oth->a; 5304 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5305 5306 /* a-array */ 5307 /*---------*/ 5308 /* post receives of a-array */ 5309 for (i=0; i<nrecvs; i++) { 5310 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5311 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5312 } 5313 5314 /* pack the outgoing message a-array */ 5315 k = 0; 5316 for (i=0; i<nsends; i++) { 5317 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5318 bufA = bufa+sstartsj[i]; 5319 for (j=0; j<nrows; j++) { 5320 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5321 for (ll=0; ll<sbs; ll++) { 5322 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5323 for (l=0; l<ncols; l++) { 5324 *bufA++ = vals[l]; 5325 } 5326 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5327 } 5328 } 5329 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5330 } 5331 /* recvs and sends of a-array are completed */ 5332 i = nrecvs; 5333 while (i--) { 5334 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5335 } 5336 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5337 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5338 5339 if (scall == MAT_INITIAL_MATRIX) { 5340 /* put together the new matrix */ 5341 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5342 5343 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5344 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5345 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5346 b_oth->free_a = PETSC_TRUE; 5347 b_oth->free_ij = PETSC_TRUE; 5348 b_oth->nonew = 0; 5349 5350 ierr = PetscFree(bufj);CHKERRQ(ierr); 5351 if (!startsj_s || !bufa_ptr) { 5352 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5353 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5354 } else { 5355 *startsj_s = sstartsj; 5356 *startsj_r = rstartsj; 5357 *bufa_ptr = bufa; 5358 } 5359 } 5360 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5361 PetscFunctionReturn(0); 5362 } 5363 5364 /*@C 5365 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5366 5367 Not Collective 5368 5369 Input Parameters: 5370 . A - The matrix in mpiaij format 5371 5372 Output Parameter: 5373 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5374 . colmap - A map from global column index to local index into lvec 5375 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5376 5377 Level: developer 5378 5379 @*/ 5380 #if defined(PETSC_USE_CTABLE) 5381 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5382 #else 5383 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5384 #endif 5385 { 5386 Mat_MPIAIJ *a; 5387 5388 PetscFunctionBegin; 5389 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5390 PetscValidPointer(lvec, 2); 5391 PetscValidPointer(colmap, 3); 5392 PetscValidPointer(multScatter, 4); 5393 a = (Mat_MPIAIJ*) A->data; 5394 if (lvec) *lvec = a->lvec; 5395 if (colmap) *colmap = a->colmap; 5396 if (multScatter) *multScatter = a->Mvctx; 5397 PetscFunctionReturn(0); 5398 } 5399 5400 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5401 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5402 #if defined(PETSC_HAVE_MKL_SPARSE) 5403 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5404 #endif 5405 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5406 #if defined(PETSC_HAVE_ELEMENTAL) 5407 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5408 #endif 5409 #if defined(PETSC_HAVE_HYPRE) 5410 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5411 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5412 #endif 5413 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 5414 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5415 5416 /* 5417 Computes (B'*A')' since computing B*A directly is untenable 5418 5419 n p p 5420 ( ) ( ) ( ) 5421 m ( A ) * n ( B ) = m ( C ) 5422 ( ) ( ) ( ) 5423 5424 */ 5425 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5426 { 5427 PetscErrorCode ierr; 5428 Mat At,Bt,Ct; 5429 5430 PetscFunctionBegin; 5431 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5432 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5433 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5434 ierr = MatDestroy(&At);CHKERRQ(ierr); 5435 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5436 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5437 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5438 PetscFunctionReturn(0); 5439 } 5440 5441 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5442 { 5443 PetscErrorCode ierr; 5444 PetscInt m=A->rmap->n,n=B->cmap->n; 5445 Mat Cmat; 5446 5447 PetscFunctionBegin; 5448 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5449 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5450 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5451 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5452 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5453 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5454 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5455 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5456 5457 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5458 5459 *C = Cmat; 5460 PetscFunctionReturn(0); 5461 } 5462 5463 /* ----------------------------------------------------------------*/ 5464 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5465 { 5466 PetscErrorCode ierr; 5467 5468 PetscFunctionBegin; 5469 if (scall == MAT_INITIAL_MATRIX) { 5470 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5471 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5472 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5473 } 5474 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5475 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5476 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5477 PetscFunctionReturn(0); 5478 } 5479 5480 /*MC 5481 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5482 5483 Options Database Keys: 5484 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5485 5486 Level: beginner 5487 5488 .seealso: MatCreateAIJ() 5489 M*/ 5490 5491 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5492 { 5493 Mat_MPIAIJ *b; 5494 PetscErrorCode ierr; 5495 PetscMPIInt size; 5496 5497 PetscFunctionBegin; 5498 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5499 5500 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5501 B->data = (void*)b; 5502 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5503 B->assembled = PETSC_FALSE; 5504 B->insertmode = NOT_SET_VALUES; 5505 b->size = size; 5506 5507 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5508 5509 /* build cache for off array entries formed */ 5510 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5511 5512 b->donotstash = PETSC_FALSE; 5513 b->colmap = 0; 5514 b->garray = 0; 5515 b->roworiented = PETSC_TRUE; 5516 5517 /* stuff used for matrix vector multiply */ 5518 b->lvec = NULL; 5519 b->Mvctx = NULL; 5520 5521 /* stuff for MatGetRow() */ 5522 b->rowindices = 0; 5523 b->rowvalues = 0; 5524 b->getrowactive = PETSC_FALSE; 5525 5526 /* flexible pointer used in CUSP/CUSPARSE classes */ 5527 b->spptr = NULL; 5528 5529 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5530 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5531 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5532 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5533 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5534 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5535 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5536 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5537 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5538 #if defined(PETSC_HAVE_MKL_SPARSE) 5539 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5540 #endif 5541 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5542 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5543 #if defined(PETSC_HAVE_ELEMENTAL) 5544 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5545 #endif 5546 #if defined(PETSC_HAVE_HYPRE) 5547 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5548 #endif 5549 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5550 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5551 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5552 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5553 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5554 #if defined(PETSC_HAVE_HYPRE) 5555 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5556 #endif 5557 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5558 PetscFunctionReturn(0); 5559 } 5560 5561 /*@C 5562 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5563 and "off-diagonal" part of the matrix in CSR format. 5564 5565 Collective on MPI_Comm 5566 5567 Input Parameters: 5568 + comm - MPI communicator 5569 . m - number of local rows (Cannot be PETSC_DECIDE) 5570 . n - This value should be the same as the local size used in creating the 5571 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5572 calculated if N is given) For square matrices n is almost always m. 5573 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5574 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5575 . i - row indices for "diagonal" portion of matrix 5576 . j - column indices 5577 . a - matrix values 5578 . oi - row indices for "off-diagonal" portion of matrix 5579 . oj - column indices 5580 - oa - matrix values 5581 5582 Output Parameter: 5583 . mat - the matrix 5584 5585 Level: advanced 5586 5587 Notes: 5588 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5589 must free the arrays once the matrix has been destroyed and not before. 5590 5591 The i and j indices are 0 based 5592 5593 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5594 5595 This sets local rows and cannot be used to set off-processor values. 5596 5597 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5598 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5599 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5600 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5601 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5602 communication if it is known that only local entries will be set. 5603 5604 .keywords: matrix, aij, compressed row, sparse, parallel 5605 5606 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5607 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5608 @*/ 5609 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5610 { 5611 PetscErrorCode ierr; 5612 Mat_MPIAIJ *maij; 5613 5614 PetscFunctionBegin; 5615 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5616 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5617 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5618 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5619 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5620 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5621 maij = (Mat_MPIAIJ*) (*mat)->data; 5622 5623 (*mat)->preallocated = PETSC_TRUE; 5624 5625 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5626 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5627 5628 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5629 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5630 5631 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5632 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5633 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5634 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5635 5636 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5637 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5638 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5639 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5640 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5641 PetscFunctionReturn(0); 5642 } 5643 5644 /* 5645 Special version for direct calls from Fortran 5646 */ 5647 #include <petsc/private/fortranimpl.h> 5648 5649 /* Change these macros so can be used in void function */ 5650 #undef CHKERRQ 5651 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5652 #undef SETERRQ2 5653 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5654 #undef SETERRQ3 5655 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5656 #undef SETERRQ 5657 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5658 5659 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5660 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5661 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5662 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5663 #else 5664 #endif 5665 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5666 { 5667 Mat mat = *mmat; 5668 PetscInt m = *mm, n = *mn; 5669 InsertMode addv = *maddv; 5670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5671 PetscScalar value; 5672 PetscErrorCode ierr; 5673 5674 MatCheckPreallocated(mat,1); 5675 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5676 5677 #if defined(PETSC_USE_DEBUG) 5678 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5679 #endif 5680 { 5681 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5682 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5683 PetscBool roworiented = aij->roworiented; 5684 5685 /* Some Variables required in the macro */ 5686 Mat A = aij->A; 5687 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5688 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5689 MatScalar *aa = a->a; 5690 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5691 Mat B = aij->B; 5692 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5693 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5694 MatScalar *ba = b->a; 5695 5696 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5697 PetscInt nonew = a->nonew; 5698 MatScalar *ap1,*ap2; 5699 5700 PetscFunctionBegin; 5701 for (i=0; i<m; i++) { 5702 if (im[i] < 0) continue; 5703 #if defined(PETSC_USE_DEBUG) 5704 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5705 #endif 5706 if (im[i] >= rstart && im[i] < rend) { 5707 row = im[i] - rstart; 5708 lastcol1 = -1; 5709 rp1 = aj + ai[row]; 5710 ap1 = aa + ai[row]; 5711 rmax1 = aimax[row]; 5712 nrow1 = ailen[row]; 5713 low1 = 0; 5714 high1 = nrow1; 5715 lastcol2 = -1; 5716 rp2 = bj + bi[row]; 5717 ap2 = ba + bi[row]; 5718 rmax2 = bimax[row]; 5719 nrow2 = bilen[row]; 5720 low2 = 0; 5721 high2 = nrow2; 5722 5723 for (j=0; j<n; j++) { 5724 if (roworiented) value = v[i*n+j]; 5725 else value = v[i+j*m]; 5726 if (in[j] >= cstart && in[j] < cend) { 5727 col = in[j] - cstart; 5728 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5729 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5730 } else if (in[j] < 0) continue; 5731 #if defined(PETSC_USE_DEBUG) 5732 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5733 #endif 5734 else { 5735 if (mat->was_assembled) { 5736 if (!aij->colmap) { 5737 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5738 } 5739 #if defined(PETSC_USE_CTABLE) 5740 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5741 col--; 5742 #else 5743 col = aij->colmap[in[j]] - 1; 5744 #endif 5745 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5746 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5747 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5748 col = in[j]; 5749 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5750 B = aij->B; 5751 b = (Mat_SeqAIJ*)B->data; 5752 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5753 rp2 = bj + bi[row]; 5754 ap2 = ba + bi[row]; 5755 rmax2 = bimax[row]; 5756 nrow2 = bilen[row]; 5757 low2 = 0; 5758 high2 = nrow2; 5759 bm = aij->B->rmap->n; 5760 ba = b->a; 5761 } 5762 } else col = in[j]; 5763 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5764 } 5765 } 5766 } else if (!aij->donotstash) { 5767 if (roworiented) { 5768 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5769 } else { 5770 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5771 } 5772 } 5773 } 5774 } 5775 PetscFunctionReturnVoid(); 5776 } 5777 5778