1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 126 PetscFunctionBegin; 127 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 128 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 129 } else { 130 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 131 } 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 138 PetscErrorCode ierr; 139 PetscInt i,rstart,nrows,*rows; 140 141 PetscFunctionBegin; 142 *zrows = NULL; 143 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 144 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 145 for (i=0; i<nrows; i++) rows[i] += rstart; 146 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 147 PetscFunctionReturn(0); 148 } 149 150 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 151 { 152 PetscErrorCode ierr; 153 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 154 PetscInt i,n,*garray = aij->garray; 155 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 156 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 157 PetscReal *work; 158 159 PetscFunctionBegin; 160 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 161 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 162 if (type == NORM_2) { 163 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 164 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 165 } 166 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 167 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 168 } 169 } else if (type == NORM_1) { 170 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 171 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 172 } 173 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 174 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 175 } 176 } else if (type == NORM_INFINITY) { 177 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 178 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 179 } 180 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 181 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 182 } 183 184 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 185 if (type == NORM_INFINITY) { 186 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 187 } else { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } 190 ierr = PetscFree(work);CHKERRQ(ierr); 191 if (type == NORM_2) { 192 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 193 } 194 PetscFunctionReturn(0); 195 } 196 197 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 198 { 199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 200 IS sis,gis; 201 PetscErrorCode ierr; 202 const PetscInt *isis,*igis; 203 PetscInt n,*iis,nsis,ngis,rstart,i; 204 205 PetscFunctionBegin; 206 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 207 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 208 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 209 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 210 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 211 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 212 213 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 215 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 216 n = ngis + nsis; 217 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 218 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 219 for (i=0; i<n; i++) iis[i] += rstart; 220 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 221 222 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 223 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 224 ierr = ISDestroy(&sis);CHKERRQ(ierr); 225 ierr = ISDestroy(&gis);CHKERRQ(ierr); 226 PetscFunctionReturn(0); 227 } 228 229 /* 230 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 231 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 232 233 Only for square matrices 234 235 Used by a preconditioner, hence PETSC_EXTERN 236 */ 237 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 238 { 239 PetscMPIInt rank,size; 240 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 241 PetscErrorCode ierr; 242 Mat mat; 243 Mat_SeqAIJ *gmata; 244 PetscMPIInt tag; 245 MPI_Status status; 246 PetscBool aij; 247 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 248 249 PetscFunctionBegin; 250 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 251 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 252 if (!rank) { 253 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 254 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 255 } 256 if (reuse == MAT_INITIAL_MATRIX) { 257 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 258 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 259 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 260 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 261 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 262 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 263 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 264 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 265 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 266 267 rowners[0] = 0; 268 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 269 rstart = rowners[rank]; 270 rend = rowners[rank+1]; 271 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 272 if (!rank) { 273 gmata = (Mat_SeqAIJ*) gmat->data; 274 /* send row lengths to all processors */ 275 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 276 for (i=1; i<size; i++) { 277 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 278 } 279 /* determine number diagonal and off-diagonal counts */ 280 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 281 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 282 jj = 0; 283 for (i=0; i<m; i++) { 284 for (j=0; j<dlens[i]; j++) { 285 if (gmata->j[jj] < rstart) ld[i]++; 286 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 287 jj++; 288 } 289 } 290 /* send column indices to other processes */ 291 for (i=1; i<size; i++) { 292 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 293 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 295 } 296 297 /* send numerical values to other processes */ 298 for (i=1; i<size; i++) { 299 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 300 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 301 } 302 gmataa = gmata->a; 303 gmataj = gmata->j; 304 305 } else { 306 /* receive row lengths */ 307 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 /* receive column indices */ 309 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 311 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 /* determine number diagonal and off-diagonal counts */ 313 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 314 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 315 jj = 0; 316 for (i=0; i<m; i++) { 317 for (j=0; j<dlens[i]; j++) { 318 if (gmataj[jj] < rstart) ld[i]++; 319 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 320 jj++; 321 } 322 } 323 /* receive numerical values */ 324 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 325 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 326 } 327 /* set preallocation */ 328 for (i=0; i<m; i++) { 329 dlens[i] -= olens[i]; 330 } 331 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 332 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 333 334 for (i=0; i<m; i++) { 335 dlens[i] += olens[i]; 336 } 337 cnt = 0; 338 for (i=0; i<m; i++) { 339 row = rstart + i; 340 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 341 cnt += dlens[i]; 342 } 343 if (rank) { 344 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 345 } 346 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 347 ierr = PetscFree(rowners);CHKERRQ(ierr); 348 349 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 350 351 *inmat = mat; 352 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 353 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 354 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 355 mat = *inmat; 356 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 357 if (!rank) { 358 /* send numerical values to other processes */ 359 gmata = (Mat_SeqAIJ*) gmat->data; 360 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 361 gmataa = gmata->a; 362 for (i=1; i<size; i++) { 363 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 364 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 365 } 366 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 367 } else { 368 /* receive numerical values from process 0*/ 369 nz = Ad->nz + Ao->nz; 370 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 371 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 372 } 373 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 374 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 375 ad = Ad->a; 376 ao = Ao->a; 377 if (mat->rmap->n) { 378 i = 0; 379 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 380 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 381 } 382 for (i=1; i<mat->rmap->n; i++) { 383 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 384 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 385 } 386 i--; 387 if (mat->rmap->n) { 388 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 389 } 390 if (rank) { 391 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 392 } 393 } 394 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 396 PetscFunctionReturn(0); 397 } 398 399 /* 400 Local utility routine that creates a mapping from the global column 401 number to the local number in the off-diagonal part of the local 402 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 403 a slightly higher hash table cost; without it it is not scalable (each processor 404 has an order N integer array but is fast to acess. 405 */ 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 463 { \ 464 if (col <= lastcol2) low2 = 0; \ 465 else high2 = nrow2; \ 466 lastcol2 = col; \ 467 while (high2-low2 > 5) { \ 468 t = (low2+high2)/2; \ 469 if (rp2[t] > col) high2 = t; \ 470 else low2 = t; \ 471 } \ 472 for (_i=low2; _i<high2; _i++) { \ 473 if (rp2[_i] > col) break; \ 474 if (rp2[_i] == col) { \ 475 if (addv == ADD_VALUES) ap2[_i] += value; \ 476 else ap2[_i] = value; \ 477 goto b_noinsert; \ 478 } \ 479 } \ 480 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 483 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 484 N = nrow2++ - 1; b->nz++; high2++; \ 485 /* shift up all the later entries in this row */ \ 486 for (ii=N; ii>=_i; ii--) { \ 487 rp2[ii+1] = rp2[ii]; \ 488 ap2[ii+1] = ap2[ii]; \ 489 } \ 490 rp2[_i] = col; \ 491 ap2[_i] = value; \ 492 B->nonzerostate++; \ 493 b_noinsert: ; \ 494 bilen[row] = nrow2; \ 495 } 496 497 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 498 { 499 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 500 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 501 PetscErrorCode ierr; 502 PetscInt l,*garray = mat->garray,diag; 503 504 PetscFunctionBegin; 505 /* code only works for square matrices A */ 506 507 /* find size of row to the left of the diagonal part */ 508 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 509 row = row - diag; 510 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 511 if (garray[b->j[b->i[row]+l]] > diag) break; 512 } 513 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 514 515 /* diagonal part */ 516 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* right of diagonal part */ 519 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 520 PetscFunctionReturn(0); 521 } 522 523 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 524 { 525 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 526 PetscScalar value; 527 PetscErrorCode ierr; 528 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 529 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 530 PetscBool roworiented = aij->roworiented; 531 532 /* Some Variables required in the macro */ 533 Mat A = aij->A; 534 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 535 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 536 MatScalar *aa = a->a; 537 PetscBool ignorezeroentries = a->ignorezeroentries; 538 Mat B = aij->B; 539 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 540 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 541 MatScalar *ba = b->a; 542 543 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 544 PetscInt nonew; 545 MatScalar *ap1,*ap2; 546 547 PetscFunctionBegin; 548 for (i=0; i<m; i++) { 549 if (im[i] < 0) continue; 550 #if defined(PETSC_USE_DEBUG) 551 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 552 #endif 553 if (im[i] >= rstart && im[i] < rend) { 554 row = im[i] - rstart; 555 lastcol1 = -1; 556 rp1 = aj + ai[row]; 557 ap1 = aa + ai[row]; 558 rmax1 = aimax[row]; 559 nrow1 = ailen[row]; 560 low1 = 0; 561 high1 = nrow1; 562 lastcol2 = -1; 563 rp2 = bj + bi[row]; 564 ap2 = ba + bi[row]; 565 rmax2 = bimax[row]; 566 nrow2 = bilen[row]; 567 low2 = 0; 568 high2 = nrow2; 569 570 for (j=0; j<n; j++) { 571 if (roworiented) value = v[i*n+j]; 572 else value = v[i+j*m]; 573 if (in[j] >= cstart && in[j] < cend) { 574 col = in[j] - cstart; 575 nonew = a->nonew; 576 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 577 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 578 } else if (in[j] < 0) continue; 579 #if defined(PETSC_USE_DEBUG) 580 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 581 #endif 582 else { 583 if (mat->was_assembled) { 584 if (!aij->colmap) { 585 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 586 } 587 #if defined(PETSC_USE_CTABLE) 588 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 589 col--; 590 #else 591 col = aij->colmap[in[j]] - 1; 592 #endif 593 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 594 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 595 col = in[j]; 596 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 597 B = aij->B; 598 b = (Mat_SeqAIJ*)B->data; 599 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 600 rp2 = bj + bi[row]; 601 ap2 = ba + bi[row]; 602 rmax2 = bimax[row]; 603 nrow2 = bilen[row]; 604 low2 = 0; 605 high2 = nrow2; 606 bm = aij->B->rmap->n; 607 ba = b->a; 608 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 609 } else col = in[j]; 610 nonew = b->nonew; 611 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 612 } 613 } 614 } else { 615 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 616 if (!aij->donotstash) { 617 mat->assembled = PETSC_FALSE; 618 if (roworiented) { 619 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 620 } else { 621 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 622 } 623 } 624 } 625 } 626 PetscFunctionReturn(0); 627 } 628 629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 632 PetscErrorCode ierr; 633 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 634 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 635 636 PetscFunctionBegin; 637 for (i=0; i<m; i++) { 638 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 639 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 640 if (idxm[i] >= rstart && idxm[i] < rend) { 641 row = idxm[i] - rstart; 642 for (j=0; j<n; j++) { 643 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 644 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 645 if (idxn[j] >= cstart && idxn[j] < cend) { 646 col = idxn[j] - cstart; 647 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 648 } else { 649 if (!aij->colmap) { 650 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 651 } 652 #if defined(PETSC_USE_CTABLE) 653 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 654 col--; 655 #else 656 col = aij->colmap[idxn[j]] - 1; 657 #endif 658 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 659 else { 660 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 661 } 662 } 663 } 664 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 665 } 666 PetscFunctionReturn(0); 667 } 668 669 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 670 671 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 672 { 673 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 674 PetscErrorCode ierr; 675 PetscInt nstash,reallocs; 676 677 PetscFunctionBegin; 678 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 679 680 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 681 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 682 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 687 { 688 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 689 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 690 PetscErrorCode ierr; 691 PetscMPIInt n; 692 PetscInt i,j,rstart,ncols,flg; 693 PetscInt *row,*col; 694 PetscBool other_disassembled; 695 PetscScalar *val; 696 697 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 698 699 PetscFunctionBegin; 700 if (!aij->donotstash && !mat->nooffprocentries) { 701 while (1) { 702 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 703 if (!flg) break; 704 705 for (i=0; i<n; ) { 706 /* Now identify the consecutive vals belonging to the same row */ 707 for (j=i,rstart=row[j]; j<n; j++) { 708 if (row[j] != rstart) break; 709 } 710 if (j < n) ncols = j-i; 711 else ncols = n-i; 712 /* Now assemble all these values with a single function call */ 713 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 714 715 i = j; 716 } 717 } 718 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 719 } 720 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 721 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 722 723 /* determine if any processor has disassembled, if so we must 724 also disassemble ourselfs, in order that we may reassemble. */ 725 /* 726 if nonzero structure of submatrix B cannot change then we know that 727 no processor disassembled thus we can skip this stuff 728 */ 729 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 730 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 731 if (mat->was_assembled && !other_disassembled) { 732 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 733 } 734 } 735 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 736 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 737 } 738 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 739 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 740 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 741 742 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 743 744 aij->rowvalues = 0; 745 746 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 747 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 748 749 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 750 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 751 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 752 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 758 { 759 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 760 PetscErrorCode ierr; 761 762 PetscFunctionBegin; 763 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 764 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 765 PetscFunctionReturn(0); 766 } 767 768 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 769 { 770 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 771 PetscInt *lrows; 772 PetscInt r, len; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 /* get locally owned rows */ 777 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 778 /* fix right hand side if needed */ 779 if (x && b) { 780 const PetscScalar *xx; 781 PetscScalar *bb; 782 783 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 784 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 785 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 786 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 787 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 788 } 789 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 790 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 791 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 792 PetscBool cong; 793 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 794 if (cong) A->congruentlayouts = 1; 795 else A->congruentlayouts = 0; 796 } 797 if ((diag != 0.0) && A->congruentlayouts) { 798 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 799 } else if (diag != 0.0) { 800 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 801 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 802 for (r = 0; r < len; ++r) { 803 const PetscInt row = lrows[r] + A->rmap->rstart; 804 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 805 } 806 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 808 } else { 809 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 810 } 811 ierr = PetscFree(lrows);CHKERRQ(ierr); 812 813 /* only change matrix nonzero state if pattern was allowed to be changed */ 814 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 815 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 816 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 817 } 818 PetscFunctionReturn(0); 819 } 820 821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 822 { 823 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 824 PetscErrorCode ierr; 825 PetscMPIInt n = A->rmap->n; 826 PetscInt i,j,r,m,p = 0,len = 0; 827 PetscInt *lrows,*owners = A->rmap->range; 828 PetscSFNode *rrows; 829 PetscSF sf; 830 const PetscScalar *xx; 831 PetscScalar *bb,*mask; 832 Vec xmask,lmask; 833 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 834 const PetscInt *aj, *ii,*ridx; 835 PetscScalar *aa; 836 837 PetscFunctionBegin; 838 /* Create SF where leaves are input rows and roots are owned rows */ 839 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 840 for (r = 0; r < n; ++r) lrows[r] = -1; 841 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 842 for (r = 0; r < N; ++r) { 843 const PetscInt idx = rows[r]; 844 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 845 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 846 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 847 } 848 rrows[r].rank = p; 849 rrows[r].index = rows[r] - owners[p]; 850 } 851 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 852 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 853 /* Collect flags for rows to be zeroed */ 854 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 856 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 857 /* Compress and put in row numbers */ 858 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 859 /* zero diagonal part of matrix */ 860 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 861 /* handle off diagonal part of matrix */ 862 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 863 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 864 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 865 for (i=0; i<len; i++) bb[lrows[i]] = 1; 866 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 867 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 870 if (x) { 871 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 874 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 875 } 876 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 877 /* remove zeroed rows of off diagonal matrix */ 878 ii = aij->i; 879 for (i=0; i<len; i++) { 880 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 881 } 882 /* loop over all elements of off process part of matrix zeroing removed columns*/ 883 if (aij->compressedrow.use) { 884 m = aij->compressedrow.nrows; 885 ii = aij->compressedrow.i; 886 ridx = aij->compressedrow.rindex; 887 for (i=0; i<m; i++) { 888 n = ii[i+1] - ii[i]; 889 aj = aij->j + ii[i]; 890 aa = aij->a + ii[i]; 891 892 for (j=0; j<n; j++) { 893 if (PetscAbsScalar(mask[*aj])) { 894 if (b) bb[*ridx] -= *aa*xx[*aj]; 895 *aa = 0.0; 896 } 897 aa++; 898 aj++; 899 } 900 ridx++; 901 } 902 } else { /* do not use compressed row format */ 903 m = l->B->rmap->n; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 for (j=0; j<n; j++) { 909 if (PetscAbsScalar(mask[*aj])) { 910 if (b) bb[i] -= *aa*xx[*aj]; 911 *aa = 0.0; 912 } 913 aa++; 914 aj++; 915 } 916 } 917 } 918 if (x) { 919 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 920 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 921 } 922 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 923 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 924 ierr = PetscFree(lrows);CHKERRQ(ierr); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 928 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 929 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 930 } 931 PetscFunctionReturn(0); 932 } 933 934 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 935 { 936 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 937 PetscErrorCode ierr; 938 PetscInt nt; 939 VecScatter Mvctx = a->Mvctx; 940 941 PetscFunctionBegin; 942 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 943 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 944 945 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 947 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 953 { 954 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 955 PetscErrorCode ierr; 956 957 PetscFunctionBegin; 958 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 962 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 963 { 964 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 965 PetscErrorCode ierr; 966 VecScatter Mvctx = a->Mvctx; 967 968 PetscFunctionBegin; 969 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 970 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 971 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 972 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 973 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 977 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 978 { 979 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 980 PetscErrorCode ierr; 981 PetscBool merged; 982 983 PetscFunctionBegin; 984 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 985 /* do nondiagonal part */ 986 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 987 if (!merged) { 988 /* send it on its way */ 989 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 990 /* do local part */ 991 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 992 /* receive remote parts: note this assumes the values are not actually */ 993 /* added in yy until the next line, */ 994 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 995 } else { 996 /* do local part */ 997 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 998 /* send it on its way */ 999 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1000 /* values actually were received in the Begin() but we need to call this nop */ 1001 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1002 } 1003 PetscFunctionReturn(0); 1004 } 1005 1006 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1007 { 1008 MPI_Comm comm; 1009 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1010 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1011 IS Me,Notme; 1012 PetscErrorCode ierr; 1013 PetscInt M,N,first,last,*notme,i; 1014 PetscMPIInt size; 1015 1016 PetscFunctionBegin; 1017 /* Easy test: symmetric diagonal block */ 1018 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1019 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1020 if (!*f) PetscFunctionReturn(0); 1021 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1022 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1023 if (size == 1) PetscFunctionReturn(0); 1024 1025 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1026 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1027 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1028 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1029 for (i=0; i<first; i++) notme[i] = i; 1030 for (i=last; i<M; i++) notme[i-last+first] = i; 1031 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1032 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1033 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1034 Aoff = Aoffs[0]; 1035 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1036 Boff = Boffs[0]; 1037 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1038 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1039 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1040 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1041 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1042 ierr = PetscFree(notme);CHKERRQ(ierr); 1043 PetscFunctionReturn(0); 1044 } 1045 1046 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1047 { 1048 PetscErrorCode ierr; 1049 1050 PetscFunctionBegin; 1051 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1052 PetscFunctionReturn(0); 1053 } 1054 1055 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1056 { 1057 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1058 PetscErrorCode ierr; 1059 1060 PetscFunctionBegin; 1061 /* do nondiagonal part */ 1062 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1063 /* send it on its way */ 1064 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1065 /* do local part */ 1066 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1067 /* receive remote parts */ 1068 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1069 PetscFunctionReturn(0); 1070 } 1071 1072 /* 1073 This only works correctly for square matrices where the subblock A->A is the 1074 diagonal block 1075 */ 1076 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1077 { 1078 PetscErrorCode ierr; 1079 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1080 1081 PetscFunctionBegin; 1082 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1083 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1084 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1089 { 1090 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1091 PetscErrorCode ierr; 1092 1093 PetscFunctionBegin; 1094 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1095 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1100 { 1101 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1102 PetscErrorCode ierr; 1103 1104 PetscFunctionBegin; 1105 #if defined(PETSC_USE_LOG) 1106 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1107 #endif 1108 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1109 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1110 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1111 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1112 #if defined(PETSC_USE_CTABLE) 1113 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1114 #else 1115 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1116 #endif 1117 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1118 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1119 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1120 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1121 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1122 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1123 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1124 1125 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1134 #if defined(PETSC_HAVE_ELEMENTAL) 1135 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1136 #endif 1137 #if defined(PETSC_HAVE_HYPRE) 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1140 #endif 1141 PetscFunctionReturn(0); 1142 } 1143 1144 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1145 { 1146 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1147 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1148 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1149 PetscErrorCode ierr; 1150 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1151 int fd; 1152 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1153 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1154 PetscScalar *column_values; 1155 PetscInt message_count,flowcontrolcount; 1156 FILE *file; 1157 1158 PetscFunctionBegin; 1159 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1160 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1161 nz = A->nz + B->nz; 1162 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1163 if (!rank) { 1164 header[0] = MAT_FILE_CLASSID; 1165 header[1] = mat->rmap->N; 1166 header[2] = mat->cmap->N; 1167 1168 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1169 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1170 /* get largest number of rows any processor has */ 1171 rlen = mat->rmap->n; 1172 range = mat->rmap->range; 1173 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1174 } else { 1175 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1176 rlen = mat->rmap->n; 1177 } 1178 1179 /* load up the local row counts */ 1180 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1181 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1182 1183 /* store the row lengths to the file */ 1184 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1185 if (!rank) { 1186 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1187 for (i=1; i<size; i++) { 1188 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1189 rlen = range[i+1] - range[i]; 1190 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1191 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1192 } 1193 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1194 } else { 1195 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1196 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1197 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1198 } 1199 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1200 1201 /* load up the local column indices */ 1202 nzmax = nz; /* th processor needs space a largest processor needs */ 1203 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1204 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1205 cnt = 0; 1206 for (i=0; i<mat->rmap->n; i++) { 1207 for (j=B->i[i]; j<B->i[i+1]; j++) { 1208 if ((col = garray[B->j[j]]) > cstart) break; 1209 column_indices[cnt++] = col; 1210 } 1211 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1212 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1213 } 1214 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1215 1216 /* store the column indices to the file */ 1217 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1218 if (!rank) { 1219 MPI_Status status; 1220 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1221 for (i=1; i<size; i++) { 1222 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1223 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1224 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1225 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1226 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1227 } 1228 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1229 } else { 1230 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1231 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1233 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1234 } 1235 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1236 1237 /* load up the local column values */ 1238 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1239 cnt = 0; 1240 for (i=0; i<mat->rmap->n; i++) { 1241 for (j=B->i[i]; j<B->i[i+1]; j++) { 1242 if (garray[B->j[j]] > cstart) break; 1243 column_values[cnt++] = B->a[j]; 1244 } 1245 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1246 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1247 } 1248 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1249 1250 /* store the column values to the file */ 1251 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1252 if (!rank) { 1253 MPI_Status status; 1254 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1255 for (i=1; i<size; i++) { 1256 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1257 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1258 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1259 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1260 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1261 } 1262 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1263 } else { 1264 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1265 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1266 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1267 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1268 } 1269 ierr = PetscFree(column_values);CHKERRQ(ierr); 1270 1271 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1272 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #include <petscdraw.h> 1277 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1278 { 1279 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1280 PetscErrorCode ierr; 1281 PetscMPIInt rank = aij->rank,size = aij->size; 1282 PetscBool isdraw,iascii,isbinary; 1283 PetscViewer sviewer; 1284 PetscViewerFormat format; 1285 1286 PetscFunctionBegin; 1287 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1288 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1289 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1290 if (iascii) { 1291 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1292 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1293 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1294 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1295 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1296 for (i=0; i<(PetscInt)size; i++) { 1297 nmax = PetscMax(nmax,nz[i]); 1298 nmin = PetscMin(nmin,nz[i]); 1299 navg += nz[i]; 1300 } 1301 ierr = PetscFree(nz);CHKERRQ(ierr); 1302 navg = navg/size; 1303 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1307 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1308 MatInfo info; 1309 PetscBool inodes; 1310 1311 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1312 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1313 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1314 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1315 if (!inodes) { 1316 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1317 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1318 } else { 1319 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1320 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1321 } 1322 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1323 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1324 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1325 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1326 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1327 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1328 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1329 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1332 PetscInt inodecount,inodelimit,*inodes; 1333 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1334 if (inodes) { 1335 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1336 } else { 1337 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1338 } 1339 PetscFunctionReturn(0); 1340 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1341 PetscFunctionReturn(0); 1342 } 1343 } else if (isbinary) { 1344 if (size == 1) { 1345 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1346 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1347 } else { 1348 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1349 } 1350 PetscFunctionReturn(0); 1351 } else if (isdraw) { 1352 PetscDraw draw; 1353 PetscBool isnull; 1354 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1355 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1356 if (isnull) PetscFunctionReturn(0); 1357 } 1358 1359 { 1360 /* assemble the entire matrix onto first processor. */ 1361 Mat A; 1362 Mat_SeqAIJ *Aloc; 1363 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1364 MatScalar *a; 1365 1366 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1367 if (!rank) { 1368 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1369 } else { 1370 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1371 } 1372 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1373 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1374 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1375 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1376 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1377 1378 /* copy over the A part */ 1379 Aloc = (Mat_SeqAIJ*)aij->A->data; 1380 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1381 row = mat->rmap->rstart; 1382 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1383 for (i=0; i<m; i++) { 1384 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1385 row++; 1386 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1387 } 1388 aj = Aloc->j; 1389 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1390 1391 /* copy over the B part */ 1392 Aloc = (Mat_SeqAIJ*)aij->B->data; 1393 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1394 row = mat->rmap->rstart; 1395 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1396 ct = cols; 1397 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1398 for (i=0; i<m; i++) { 1399 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1400 row++; 1401 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1402 } 1403 ierr = PetscFree(ct);CHKERRQ(ierr); 1404 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1405 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1406 /* 1407 Everyone has to call to draw the matrix since the graphics waits are 1408 synchronized across all processors that share the PetscDraw object 1409 */ 1410 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1411 if (!rank) { 1412 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1413 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1414 } 1415 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1416 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1417 ierr = MatDestroy(&A);CHKERRQ(ierr); 1418 } 1419 PetscFunctionReturn(0); 1420 } 1421 1422 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1423 { 1424 PetscErrorCode ierr; 1425 PetscBool iascii,isdraw,issocket,isbinary; 1426 1427 PetscFunctionBegin; 1428 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1429 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1430 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1431 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1432 if (iascii || isdraw || isbinary || issocket) { 1433 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1434 } 1435 PetscFunctionReturn(0); 1436 } 1437 1438 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1439 { 1440 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1441 PetscErrorCode ierr; 1442 Vec bb1 = 0; 1443 PetscBool hasop; 1444 1445 PetscFunctionBegin; 1446 if (flag == SOR_APPLY_UPPER) { 1447 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } 1450 1451 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1452 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1453 } 1454 1455 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1456 if (flag & SOR_ZERO_INITIAL_GUESS) { 1457 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1458 its--; 1459 } 1460 1461 while (its--) { 1462 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1463 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1464 1465 /* update rhs: bb1 = bb - B*x */ 1466 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1467 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1468 1469 /* local sweep */ 1470 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1471 } 1472 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1473 if (flag & SOR_ZERO_INITIAL_GUESS) { 1474 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1475 its--; 1476 } 1477 while (its--) { 1478 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1479 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1480 1481 /* update rhs: bb1 = bb - B*x */ 1482 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1483 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1484 1485 /* local sweep */ 1486 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1487 } 1488 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1489 if (flag & SOR_ZERO_INITIAL_GUESS) { 1490 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1491 its--; 1492 } 1493 while (its--) { 1494 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1495 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1496 1497 /* update rhs: bb1 = bb - B*x */ 1498 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1499 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1500 1501 /* local sweep */ 1502 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1503 } 1504 } else if (flag & SOR_EISENSTAT) { 1505 Vec xx1; 1506 1507 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1508 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1509 1510 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1511 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1512 if (!mat->diag) { 1513 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1514 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1515 } 1516 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1517 if (hasop) { 1518 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1519 } else { 1520 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1521 } 1522 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1523 1524 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1525 1526 /* local sweep */ 1527 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1528 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1529 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1530 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1531 1532 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1533 1534 matin->factorerrortype = mat->A->factorerrortype; 1535 PetscFunctionReturn(0); 1536 } 1537 1538 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1539 { 1540 Mat aA,aB,Aperm; 1541 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1542 PetscScalar *aa,*ba; 1543 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1544 PetscSF rowsf,sf; 1545 IS parcolp = NULL; 1546 PetscBool done; 1547 PetscErrorCode ierr; 1548 1549 PetscFunctionBegin; 1550 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1551 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1552 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1553 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1554 1555 /* Invert row permutation to find out where my rows should go */ 1556 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1557 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1558 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1559 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1560 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1561 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1562 1563 /* Invert column permutation to find out where my columns should go */ 1564 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1565 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1566 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1567 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1568 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1569 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1570 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1571 1572 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1573 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1574 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1575 1576 /* Find out where my gcols should go */ 1577 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1578 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1579 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1580 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1581 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1582 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1583 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1584 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1585 1586 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1587 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1588 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1589 for (i=0; i<m; i++) { 1590 PetscInt row = rdest[i],rowner; 1591 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1592 for (j=ai[i]; j<ai[i+1]; j++) { 1593 PetscInt cowner,col = cdest[aj[j]]; 1594 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1595 if (rowner == cowner) dnnz[i]++; 1596 else onnz[i]++; 1597 } 1598 for (j=bi[i]; j<bi[i+1]; j++) { 1599 PetscInt cowner,col = gcdest[bj[j]]; 1600 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1601 if (rowner == cowner) dnnz[i]++; 1602 else onnz[i]++; 1603 } 1604 } 1605 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1606 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1607 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1608 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1609 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1610 1611 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1612 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1613 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1614 for (i=0; i<m; i++) { 1615 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1616 PetscInt j0,rowlen; 1617 rowlen = ai[i+1] - ai[i]; 1618 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1619 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1620 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1621 } 1622 rowlen = bi[i+1] - bi[i]; 1623 for (j0=j=0; j<rowlen; j0=j) { 1624 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1625 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1626 } 1627 } 1628 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1629 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1630 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1631 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1632 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1633 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1634 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1635 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1636 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1637 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1638 *B = Aperm; 1639 PetscFunctionReturn(0); 1640 } 1641 1642 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1643 { 1644 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1645 PetscErrorCode ierr; 1646 1647 PetscFunctionBegin; 1648 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1649 if (ghosts) *ghosts = aij->garray; 1650 PetscFunctionReturn(0); 1651 } 1652 1653 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1654 { 1655 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1656 Mat A = mat->A,B = mat->B; 1657 PetscErrorCode ierr; 1658 PetscReal isend[5],irecv[5]; 1659 1660 PetscFunctionBegin; 1661 info->block_size = 1.0; 1662 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1663 1664 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1665 isend[3] = info->memory; isend[4] = info->mallocs; 1666 1667 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1668 1669 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1670 isend[3] += info->memory; isend[4] += info->mallocs; 1671 if (flag == MAT_LOCAL) { 1672 info->nz_used = isend[0]; 1673 info->nz_allocated = isend[1]; 1674 info->nz_unneeded = isend[2]; 1675 info->memory = isend[3]; 1676 info->mallocs = isend[4]; 1677 } else if (flag == MAT_GLOBAL_MAX) { 1678 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1679 1680 info->nz_used = irecv[0]; 1681 info->nz_allocated = irecv[1]; 1682 info->nz_unneeded = irecv[2]; 1683 info->memory = irecv[3]; 1684 info->mallocs = irecv[4]; 1685 } else if (flag == MAT_GLOBAL_SUM) { 1686 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1687 1688 info->nz_used = irecv[0]; 1689 info->nz_allocated = irecv[1]; 1690 info->nz_unneeded = irecv[2]; 1691 info->memory = irecv[3]; 1692 info->mallocs = irecv[4]; 1693 } 1694 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1695 info->fill_ratio_needed = 0; 1696 info->factor_mallocs = 0; 1697 PetscFunctionReturn(0); 1698 } 1699 1700 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1701 { 1702 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1703 PetscErrorCode ierr; 1704 1705 PetscFunctionBegin; 1706 switch (op) { 1707 case MAT_NEW_NONZERO_LOCATIONS: 1708 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1709 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1710 case MAT_KEEP_NONZERO_PATTERN: 1711 case MAT_NEW_NONZERO_LOCATION_ERR: 1712 case MAT_USE_INODES: 1713 case MAT_IGNORE_ZERO_ENTRIES: 1714 MatCheckPreallocated(A,1); 1715 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1716 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1717 break; 1718 case MAT_ROW_ORIENTED: 1719 MatCheckPreallocated(A,1); 1720 a->roworiented = flg; 1721 1722 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1723 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1724 break; 1725 case MAT_NEW_DIAGONALS: 1726 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1727 break; 1728 case MAT_IGNORE_OFF_PROC_ENTRIES: 1729 a->donotstash = flg; 1730 break; 1731 case MAT_SPD: 1732 A->spd_set = PETSC_TRUE; 1733 A->spd = flg; 1734 if (flg) { 1735 A->symmetric = PETSC_TRUE; 1736 A->structurally_symmetric = PETSC_TRUE; 1737 A->symmetric_set = PETSC_TRUE; 1738 A->structurally_symmetric_set = PETSC_TRUE; 1739 } 1740 break; 1741 case MAT_SYMMETRIC: 1742 MatCheckPreallocated(A,1); 1743 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1744 break; 1745 case MAT_STRUCTURALLY_SYMMETRIC: 1746 MatCheckPreallocated(A,1); 1747 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1748 break; 1749 case MAT_HERMITIAN: 1750 MatCheckPreallocated(A,1); 1751 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1752 break; 1753 case MAT_SYMMETRY_ETERNAL: 1754 MatCheckPreallocated(A,1); 1755 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1756 break; 1757 case MAT_SUBMAT_SINGLEIS: 1758 A->submat_singleis = flg; 1759 break; 1760 case MAT_STRUCTURE_ONLY: 1761 /* The option is handled directly by MatSetOption() */ 1762 break; 1763 default: 1764 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1765 } 1766 PetscFunctionReturn(0); 1767 } 1768 1769 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1770 { 1771 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1772 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1773 PetscErrorCode ierr; 1774 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1775 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1776 PetscInt *cmap,*idx_p; 1777 1778 PetscFunctionBegin; 1779 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1780 mat->getrowactive = PETSC_TRUE; 1781 1782 if (!mat->rowvalues && (idx || v)) { 1783 /* 1784 allocate enough space to hold information from the longest row. 1785 */ 1786 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1787 PetscInt max = 1,tmp; 1788 for (i=0; i<matin->rmap->n; i++) { 1789 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1790 if (max < tmp) max = tmp; 1791 } 1792 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1793 } 1794 1795 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1796 lrow = row - rstart; 1797 1798 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1799 if (!v) {pvA = 0; pvB = 0;} 1800 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1801 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1802 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1803 nztot = nzA + nzB; 1804 1805 cmap = mat->garray; 1806 if (v || idx) { 1807 if (nztot) { 1808 /* Sort by increasing column numbers, assuming A and B already sorted */ 1809 PetscInt imark = -1; 1810 if (v) { 1811 *v = v_p = mat->rowvalues; 1812 for (i=0; i<nzB; i++) { 1813 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1814 else break; 1815 } 1816 imark = i; 1817 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1818 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1819 } 1820 if (idx) { 1821 *idx = idx_p = mat->rowindices; 1822 if (imark > -1) { 1823 for (i=0; i<imark; i++) { 1824 idx_p[i] = cmap[cworkB[i]]; 1825 } 1826 } else { 1827 for (i=0; i<nzB; i++) { 1828 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1829 else break; 1830 } 1831 imark = i; 1832 } 1833 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1834 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1835 } 1836 } else { 1837 if (idx) *idx = 0; 1838 if (v) *v = 0; 1839 } 1840 } 1841 *nz = nztot; 1842 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1843 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1844 PetscFunctionReturn(0); 1845 } 1846 1847 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1848 { 1849 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1850 1851 PetscFunctionBegin; 1852 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1853 aij->getrowactive = PETSC_FALSE; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1858 { 1859 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1860 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1861 PetscErrorCode ierr; 1862 PetscInt i,j,cstart = mat->cmap->rstart; 1863 PetscReal sum = 0.0; 1864 MatScalar *v; 1865 1866 PetscFunctionBegin; 1867 if (aij->size == 1) { 1868 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1869 } else { 1870 if (type == NORM_FROBENIUS) { 1871 v = amat->a; 1872 for (i=0; i<amat->nz; i++) { 1873 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1874 } 1875 v = bmat->a; 1876 for (i=0; i<bmat->nz; i++) { 1877 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1878 } 1879 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1880 *norm = PetscSqrtReal(*norm); 1881 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1882 } else if (type == NORM_1) { /* max column norm */ 1883 PetscReal *tmp,*tmp2; 1884 PetscInt *jj,*garray = aij->garray; 1885 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1886 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1887 *norm = 0.0; 1888 v = amat->a; jj = amat->j; 1889 for (j=0; j<amat->nz; j++) { 1890 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1891 } 1892 v = bmat->a; jj = bmat->j; 1893 for (j=0; j<bmat->nz; j++) { 1894 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1895 } 1896 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1897 for (j=0; j<mat->cmap->N; j++) { 1898 if (tmp2[j] > *norm) *norm = tmp2[j]; 1899 } 1900 ierr = PetscFree(tmp);CHKERRQ(ierr); 1901 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1902 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1903 } else if (type == NORM_INFINITY) { /* max row norm */ 1904 PetscReal ntemp = 0.0; 1905 for (j=0; j<aij->A->rmap->n; j++) { 1906 v = amat->a + amat->i[j]; 1907 sum = 0.0; 1908 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1909 sum += PetscAbsScalar(*v); v++; 1910 } 1911 v = bmat->a + bmat->i[j]; 1912 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1913 sum += PetscAbsScalar(*v); v++; 1914 } 1915 if (sum > ntemp) ntemp = sum; 1916 } 1917 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1918 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1919 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1920 } 1921 PetscFunctionReturn(0); 1922 } 1923 1924 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1925 { 1926 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1927 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1928 PetscErrorCode ierr; 1929 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1930 PetscInt cstart = A->cmap->rstart,ncol; 1931 Mat B; 1932 MatScalar *array; 1933 1934 PetscFunctionBegin; 1935 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1936 1937 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1938 ai = Aloc->i; aj = Aloc->j; 1939 bi = Bloc->i; bj = Bloc->j; 1940 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1941 PetscInt *d_nnz,*g_nnz,*o_nnz; 1942 PetscSFNode *oloc; 1943 PETSC_UNUSED PetscSF sf; 1944 1945 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1946 /* compute d_nnz for preallocation */ 1947 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1948 for (i=0; i<ai[ma]; i++) { 1949 d_nnz[aj[i]]++; 1950 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1951 } 1952 /* compute local off-diagonal contributions */ 1953 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1954 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1955 /* map those to global */ 1956 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1957 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1958 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1959 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1960 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1961 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1962 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1963 1964 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1965 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1966 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1967 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1968 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1969 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1970 } else { 1971 B = *matout; 1972 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1973 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1974 } 1975 1976 /* copy over the A part */ 1977 array = Aloc->a; 1978 row = A->rmap->rstart; 1979 for (i=0; i<ma; i++) { 1980 ncol = ai[i+1]-ai[i]; 1981 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1982 row++; 1983 array += ncol; aj += ncol; 1984 } 1985 aj = Aloc->j; 1986 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1987 1988 /* copy over the B part */ 1989 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1990 array = Bloc->a; 1991 row = A->rmap->rstart; 1992 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1993 cols_tmp = cols; 1994 for (i=0; i<mb; i++) { 1995 ncol = bi[i+1]-bi[i]; 1996 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1997 row++; 1998 array += ncol; cols_tmp += ncol; 1999 } 2000 ierr = PetscFree(cols);CHKERRQ(ierr); 2001 2002 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2003 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2004 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2005 *matout = B; 2006 } else { 2007 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2008 } 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2013 { 2014 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2015 Mat a = aij->A,b = aij->B; 2016 PetscErrorCode ierr; 2017 PetscInt s1,s2,s3; 2018 2019 PetscFunctionBegin; 2020 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2021 if (rr) { 2022 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2023 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2024 /* Overlap communication with computation. */ 2025 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2026 } 2027 if (ll) { 2028 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2029 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2030 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2031 } 2032 /* scale the diagonal block */ 2033 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2034 2035 if (rr) { 2036 /* Do a scatter end and then right scale the off-diagonal block */ 2037 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2038 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2039 } 2040 PetscFunctionReturn(0); 2041 } 2042 2043 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2044 { 2045 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2046 PetscErrorCode ierr; 2047 2048 PetscFunctionBegin; 2049 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2054 { 2055 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2056 Mat a,b,c,d; 2057 PetscBool flg; 2058 PetscErrorCode ierr; 2059 2060 PetscFunctionBegin; 2061 a = matA->A; b = matA->B; 2062 c = matB->A; d = matB->B; 2063 2064 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2065 if (flg) { 2066 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2067 } 2068 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2069 PetscFunctionReturn(0); 2070 } 2071 2072 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2073 { 2074 PetscErrorCode ierr; 2075 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2076 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2077 2078 PetscFunctionBegin; 2079 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2080 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2081 /* because of the column compression in the off-processor part of the matrix a->B, 2082 the number of columns in a->B and b->B may be different, hence we cannot call 2083 the MatCopy() directly on the two parts. If need be, we can provide a more 2084 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2085 then copying the submatrices */ 2086 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2087 } else { 2088 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2089 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2090 } 2091 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2092 PetscFunctionReturn(0); 2093 } 2094 2095 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2096 { 2097 PetscErrorCode ierr; 2098 2099 PetscFunctionBegin; 2100 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2101 PetscFunctionReturn(0); 2102 } 2103 2104 /* 2105 Computes the number of nonzeros per row needed for preallocation when X and Y 2106 have different nonzero structure. 2107 */ 2108 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2109 { 2110 PetscInt i,j,k,nzx,nzy; 2111 2112 PetscFunctionBegin; 2113 /* Set the number of nonzeros in the new matrix */ 2114 for (i=0; i<m; i++) { 2115 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2116 nzx = xi[i+1] - xi[i]; 2117 nzy = yi[i+1] - yi[i]; 2118 nnz[i] = 0; 2119 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2120 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2121 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2122 nnz[i]++; 2123 } 2124 for (; k<nzy; k++) nnz[i]++; 2125 } 2126 PetscFunctionReturn(0); 2127 } 2128 2129 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2130 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2131 { 2132 PetscErrorCode ierr; 2133 PetscInt m = Y->rmap->N; 2134 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2135 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2136 2137 PetscFunctionBegin; 2138 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2143 { 2144 PetscErrorCode ierr; 2145 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2146 PetscBLASInt bnz,one=1; 2147 Mat_SeqAIJ *x,*y; 2148 2149 PetscFunctionBegin; 2150 if (str == SAME_NONZERO_PATTERN) { 2151 PetscScalar alpha = a; 2152 x = (Mat_SeqAIJ*)xx->A->data; 2153 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2154 y = (Mat_SeqAIJ*)yy->A->data; 2155 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2156 x = (Mat_SeqAIJ*)xx->B->data; 2157 y = (Mat_SeqAIJ*)yy->B->data; 2158 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2159 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2160 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2161 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2162 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2163 } else { 2164 Mat B; 2165 PetscInt *nnz_d,*nnz_o; 2166 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2167 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2168 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2169 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2170 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2171 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2172 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2173 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2174 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2175 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2176 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2177 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2178 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2179 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2180 } 2181 PetscFunctionReturn(0); 2182 } 2183 2184 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2185 2186 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2187 { 2188 #if defined(PETSC_USE_COMPLEX) 2189 PetscErrorCode ierr; 2190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2191 2192 PetscFunctionBegin; 2193 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2194 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2195 #else 2196 PetscFunctionBegin; 2197 #endif 2198 PetscFunctionReturn(0); 2199 } 2200 2201 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2204 PetscErrorCode ierr; 2205 2206 PetscFunctionBegin; 2207 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2208 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2213 { 2214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2215 PetscErrorCode ierr; 2216 2217 PetscFunctionBegin; 2218 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2219 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2220 PetscFunctionReturn(0); 2221 } 2222 2223 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2224 { 2225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2226 PetscErrorCode ierr; 2227 PetscInt i,*idxb = 0; 2228 PetscScalar *va,*vb; 2229 Vec vtmp; 2230 2231 PetscFunctionBegin; 2232 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2233 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2234 if (idx) { 2235 for (i=0; i<A->rmap->n; i++) { 2236 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2237 } 2238 } 2239 2240 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2241 if (idx) { 2242 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2243 } 2244 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2245 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2246 2247 for (i=0; i<A->rmap->n; i++) { 2248 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2249 va[i] = vb[i]; 2250 if (idx) idx[i] = a->garray[idxb[i]]; 2251 } 2252 } 2253 2254 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2255 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2256 ierr = PetscFree(idxb);CHKERRQ(ierr); 2257 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2258 PetscFunctionReturn(0); 2259 } 2260 2261 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2262 { 2263 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2264 PetscErrorCode ierr; 2265 PetscInt i,*idxb = 0; 2266 PetscScalar *va,*vb; 2267 Vec vtmp; 2268 2269 PetscFunctionBegin; 2270 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2271 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2272 if (idx) { 2273 for (i=0; i<A->cmap->n; i++) { 2274 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2275 } 2276 } 2277 2278 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2279 if (idx) { 2280 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2281 } 2282 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2283 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2284 2285 for (i=0; i<A->rmap->n; i++) { 2286 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2287 va[i] = vb[i]; 2288 if (idx) idx[i] = a->garray[idxb[i]]; 2289 } 2290 } 2291 2292 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2293 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2294 ierr = PetscFree(idxb);CHKERRQ(ierr); 2295 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2296 PetscFunctionReturn(0); 2297 } 2298 2299 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2300 { 2301 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2302 PetscInt n = A->rmap->n; 2303 PetscInt cstart = A->cmap->rstart; 2304 PetscInt *cmap = mat->garray; 2305 PetscInt *diagIdx, *offdiagIdx; 2306 Vec diagV, offdiagV; 2307 PetscScalar *a, *diagA, *offdiagA; 2308 PetscInt r; 2309 PetscErrorCode ierr; 2310 2311 PetscFunctionBegin; 2312 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2313 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2314 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2315 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2316 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2317 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2318 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2319 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2320 for (r = 0; r < n; ++r) { 2321 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2322 a[r] = diagA[r]; 2323 idx[r] = cstart + diagIdx[r]; 2324 } else { 2325 a[r] = offdiagA[r]; 2326 idx[r] = cmap[offdiagIdx[r]]; 2327 } 2328 } 2329 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2330 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2331 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2332 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2333 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2334 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2335 PetscFunctionReturn(0); 2336 } 2337 2338 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2339 { 2340 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2341 PetscInt n = A->rmap->n; 2342 PetscInt cstart = A->cmap->rstart; 2343 PetscInt *cmap = mat->garray; 2344 PetscInt *diagIdx, *offdiagIdx; 2345 Vec diagV, offdiagV; 2346 PetscScalar *a, *diagA, *offdiagA; 2347 PetscInt r; 2348 PetscErrorCode ierr; 2349 2350 PetscFunctionBegin; 2351 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2352 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2353 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2354 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2355 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2356 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2357 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2358 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2359 for (r = 0; r < n; ++r) { 2360 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2361 a[r] = diagA[r]; 2362 idx[r] = cstart + diagIdx[r]; 2363 } else { 2364 a[r] = offdiagA[r]; 2365 idx[r] = cmap[offdiagIdx[r]]; 2366 } 2367 } 2368 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2369 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2370 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2371 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2372 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2373 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2374 PetscFunctionReturn(0); 2375 } 2376 2377 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2378 { 2379 PetscErrorCode ierr; 2380 Mat *dummy; 2381 2382 PetscFunctionBegin; 2383 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2384 *newmat = *dummy; 2385 ierr = PetscFree(dummy);CHKERRQ(ierr); 2386 PetscFunctionReturn(0); 2387 } 2388 2389 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2390 { 2391 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2392 PetscErrorCode ierr; 2393 2394 PetscFunctionBegin; 2395 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2396 A->factorerrortype = a->A->factorerrortype; 2397 PetscFunctionReturn(0); 2398 } 2399 2400 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2401 { 2402 PetscErrorCode ierr; 2403 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2404 2405 PetscFunctionBegin; 2406 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2407 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2408 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2409 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2410 PetscFunctionReturn(0); 2411 } 2412 2413 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2414 { 2415 PetscFunctionBegin; 2416 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2417 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2418 PetscFunctionReturn(0); 2419 } 2420 2421 /*@ 2422 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2423 2424 Collective on Mat 2425 2426 Input Parameters: 2427 + A - the matrix 2428 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2429 2430 Level: advanced 2431 2432 @*/ 2433 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2434 { 2435 PetscErrorCode ierr; 2436 2437 PetscFunctionBegin; 2438 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2439 PetscFunctionReturn(0); 2440 } 2441 2442 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2443 { 2444 PetscErrorCode ierr; 2445 PetscBool sc = PETSC_FALSE,flg; 2446 2447 PetscFunctionBegin; 2448 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2449 ierr = PetscObjectOptionsBegin((PetscObject)A); 2450 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2451 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2452 if (flg) { 2453 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2454 } 2455 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2456 PetscFunctionReturn(0); 2457 } 2458 2459 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2460 { 2461 PetscErrorCode ierr; 2462 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2463 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2464 2465 PetscFunctionBegin; 2466 if (!Y->preallocated) { 2467 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2468 } else if (!aij->nz) { 2469 PetscInt nonew = aij->nonew; 2470 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2471 aij->nonew = nonew; 2472 } 2473 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2478 { 2479 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2480 PetscErrorCode ierr; 2481 2482 PetscFunctionBegin; 2483 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2484 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2485 if (d) { 2486 PetscInt rstart; 2487 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2488 *d += rstart; 2489 2490 } 2491 PetscFunctionReturn(0); 2492 } 2493 2494 2495 /* -------------------------------------------------------------------*/ 2496 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2497 MatGetRow_MPIAIJ, 2498 MatRestoreRow_MPIAIJ, 2499 MatMult_MPIAIJ, 2500 /* 4*/ MatMultAdd_MPIAIJ, 2501 MatMultTranspose_MPIAIJ, 2502 MatMultTransposeAdd_MPIAIJ, 2503 0, 2504 0, 2505 0, 2506 /*10*/ 0, 2507 0, 2508 0, 2509 MatSOR_MPIAIJ, 2510 MatTranspose_MPIAIJ, 2511 /*15*/ MatGetInfo_MPIAIJ, 2512 MatEqual_MPIAIJ, 2513 MatGetDiagonal_MPIAIJ, 2514 MatDiagonalScale_MPIAIJ, 2515 MatNorm_MPIAIJ, 2516 /*20*/ MatAssemblyBegin_MPIAIJ, 2517 MatAssemblyEnd_MPIAIJ, 2518 MatSetOption_MPIAIJ, 2519 MatZeroEntries_MPIAIJ, 2520 /*24*/ MatZeroRows_MPIAIJ, 2521 0, 2522 0, 2523 0, 2524 0, 2525 /*29*/ MatSetUp_MPIAIJ, 2526 0, 2527 0, 2528 MatGetDiagonalBlock_MPIAIJ, 2529 0, 2530 /*34*/ MatDuplicate_MPIAIJ, 2531 0, 2532 0, 2533 0, 2534 0, 2535 /*39*/ MatAXPY_MPIAIJ, 2536 MatCreateSubMatrices_MPIAIJ, 2537 MatIncreaseOverlap_MPIAIJ, 2538 MatGetValues_MPIAIJ, 2539 MatCopy_MPIAIJ, 2540 /*44*/ MatGetRowMax_MPIAIJ, 2541 MatScale_MPIAIJ, 2542 MatShift_MPIAIJ, 2543 MatDiagonalSet_MPIAIJ, 2544 MatZeroRowsColumns_MPIAIJ, 2545 /*49*/ MatSetRandom_MPIAIJ, 2546 0, 2547 0, 2548 0, 2549 0, 2550 /*54*/ MatFDColoringCreate_MPIXAIJ, 2551 0, 2552 MatSetUnfactored_MPIAIJ, 2553 MatPermute_MPIAIJ, 2554 0, 2555 /*59*/ MatCreateSubMatrix_MPIAIJ, 2556 MatDestroy_MPIAIJ, 2557 MatView_MPIAIJ, 2558 0, 2559 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2560 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2561 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2562 0, 2563 0, 2564 0, 2565 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2566 MatGetRowMinAbs_MPIAIJ, 2567 0, 2568 0, 2569 0, 2570 0, 2571 /*75*/ MatFDColoringApply_AIJ, 2572 MatSetFromOptions_MPIAIJ, 2573 0, 2574 0, 2575 MatFindZeroDiagonals_MPIAIJ, 2576 /*80*/ 0, 2577 0, 2578 0, 2579 /*83*/ MatLoad_MPIAIJ, 2580 MatIsSymmetric_MPIAIJ, 2581 0, 2582 0, 2583 0, 2584 0, 2585 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2586 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2587 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2588 MatPtAP_MPIAIJ_MPIAIJ, 2589 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2590 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2591 0, 2592 0, 2593 0, 2594 0, 2595 /*99*/ 0, 2596 0, 2597 0, 2598 MatConjugate_MPIAIJ, 2599 0, 2600 /*104*/MatSetValuesRow_MPIAIJ, 2601 MatRealPart_MPIAIJ, 2602 MatImaginaryPart_MPIAIJ, 2603 0, 2604 0, 2605 /*109*/0, 2606 0, 2607 MatGetRowMin_MPIAIJ, 2608 0, 2609 MatMissingDiagonal_MPIAIJ, 2610 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2611 0, 2612 MatGetGhosts_MPIAIJ, 2613 0, 2614 0, 2615 /*119*/0, 2616 0, 2617 0, 2618 0, 2619 MatGetMultiProcBlock_MPIAIJ, 2620 /*124*/MatFindNonzeroRows_MPIAIJ, 2621 MatGetColumnNorms_MPIAIJ, 2622 MatInvertBlockDiagonal_MPIAIJ, 2623 0, 2624 MatCreateSubMatricesMPI_MPIAIJ, 2625 /*129*/0, 2626 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2627 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2628 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2629 0, 2630 /*134*/0, 2631 0, 2632 MatRARt_MPIAIJ_MPIAIJ, 2633 0, 2634 0, 2635 /*139*/MatSetBlockSizes_MPIAIJ, 2636 0, 2637 0, 2638 MatFDColoringSetUp_MPIXAIJ, 2639 MatFindOffBlockDiagonalEntries_MPIAIJ, 2640 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2641 }; 2642 2643 /* ----------------------------------------------------------------------------------------*/ 2644 2645 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2646 { 2647 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2648 PetscErrorCode ierr; 2649 2650 PetscFunctionBegin; 2651 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2652 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2653 PetscFunctionReturn(0); 2654 } 2655 2656 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2657 { 2658 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2659 PetscErrorCode ierr; 2660 2661 PetscFunctionBegin; 2662 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2663 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2664 PetscFunctionReturn(0); 2665 } 2666 2667 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2668 { 2669 Mat_MPIAIJ *b; 2670 PetscErrorCode ierr; 2671 2672 PetscFunctionBegin; 2673 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2674 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2675 b = (Mat_MPIAIJ*)B->data; 2676 2677 #if defined(PETSC_USE_CTABLE) 2678 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2679 #else 2680 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2681 #endif 2682 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2683 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2684 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2685 2686 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2687 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2688 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2689 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2690 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2691 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2692 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2693 2694 if (!B->preallocated) { 2695 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2696 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2697 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2698 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2699 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2700 } 2701 2702 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2703 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2704 B->preallocated = PETSC_TRUE; 2705 B->was_assembled = PETSC_FALSE; 2706 B->assembled = PETSC_FALSE;; 2707 PetscFunctionReturn(0); 2708 } 2709 2710 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2711 { 2712 Mat_MPIAIJ *b; 2713 PetscErrorCode ierr; 2714 2715 PetscFunctionBegin; 2716 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2717 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2718 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2719 b = (Mat_MPIAIJ*)B->data; 2720 2721 #if defined(PETSC_USE_CTABLE) 2722 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2723 #else 2724 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2725 #endif 2726 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2727 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2728 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2729 2730 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2731 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2732 B->preallocated = PETSC_TRUE; 2733 B->was_assembled = PETSC_FALSE; 2734 B->assembled = PETSC_FALSE; 2735 PetscFunctionReturn(0); 2736 } 2737 2738 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2739 { 2740 Mat mat; 2741 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2742 PetscErrorCode ierr; 2743 2744 PetscFunctionBegin; 2745 *newmat = 0; 2746 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2747 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2748 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2749 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2750 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2751 a = (Mat_MPIAIJ*)mat->data; 2752 2753 mat->factortype = matin->factortype; 2754 mat->assembled = PETSC_TRUE; 2755 mat->insertmode = NOT_SET_VALUES; 2756 mat->preallocated = PETSC_TRUE; 2757 2758 a->size = oldmat->size; 2759 a->rank = oldmat->rank; 2760 a->donotstash = oldmat->donotstash; 2761 a->roworiented = oldmat->roworiented; 2762 a->rowindices = 0; 2763 a->rowvalues = 0; 2764 a->getrowactive = PETSC_FALSE; 2765 2766 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2767 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2768 2769 if (oldmat->colmap) { 2770 #if defined(PETSC_USE_CTABLE) 2771 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2772 #else 2773 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2774 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2775 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2776 #endif 2777 } else a->colmap = 0; 2778 if (oldmat->garray) { 2779 PetscInt len; 2780 len = oldmat->B->cmap->n; 2781 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2782 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2783 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2784 } else a->garray = 0; 2785 2786 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2787 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2788 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2789 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2790 2791 if (oldmat->Mvctx_mpi1) { 2792 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2793 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2794 } 2795 2796 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2797 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2798 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2799 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2800 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2801 *newmat = mat; 2802 PetscFunctionReturn(0); 2803 } 2804 2805 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2806 { 2807 PetscScalar *vals,*svals; 2808 MPI_Comm comm; 2809 PetscErrorCode ierr; 2810 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2811 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2812 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2813 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2814 PetscInt cend,cstart,n,*rowners; 2815 int fd; 2816 PetscInt bs = newMat->rmap->bs; 2817 2818 PetscFunctionBegin; 2819 /* force binary viewer to load .info file if it has not yet done so */ 2820 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2821 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2822 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2823 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2824 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2825 if (!rank) { 2826 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2827 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2828 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2829 } 2830 2831 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2832 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2833 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2834 if (bs < 0) bs = 1; 2835 2836 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2837 M = header[1]; N = header[2]; 2838 2839 /* If global sizes are set, check if they are consistent with that given in the file */ 2840 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2841 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2842 2843 /* determine ownership of all (block) rows */ 2844 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2845 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2846 else m = newMat->rmap->n; /* Set by user */ 2847 2848 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2849 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2850 2851 /* First process needs enough room for process with most rows */ 2852 if (!rank) { 2853 mmax = rowners[1]; 2854 for (i=2; i<=size; i++) { 2855 mmax = PetscMax(mmax, rowners[i]); 2856 } 2857 } else mmax = -1; /* unused, but compilers complain */ 2858 2859 rowners[0] = 0; 2860 for (i=2; i<=size; i++) { 2861 rowners[i] += rowners[i-1]; 2862 } 2863 rstart = rowners[rank]; 2864 rend = rowners[rank+1]; 2865 2866 /* distribute row lengths to all processors */ 2867 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2868 if (!rank) { 2869 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2870 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2871 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2872 for (j=0; j<m; j++) { 2873 procsnz[0] += ourlens[j]; 2874 } 2875 for (i=1; i<size; i++) { 2876 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2877 /* calculate the number of nonzeros on each processor */ 2878 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2879 procsnz[i] += rowlengths[j]; 2880 } 2881 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2882 } 2883 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2884 } else { 2885 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2886 } 2887 2888 if (!rank) { 2889 /* determine max buffer needed and allocate it */ 2890 maxnz = 0; 2891 for (i=0; i<size; i++) { 2892 maxnz = PetscMax(maxnz,procsnz[i]); 2893 } 2894 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2895 2896 /* read in my part of the matrix column indices */ 2897 nz = procsnz[0]; 2898 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2899 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2900 2901 /* read in every one elses and ship off */ 2902 for (i=1; i<size; i++) { 2903 nz = procsnz[i]; 2904 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2905 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2906 } 2907 ierr = PetscFree(cols);CHKERRQ(ierr); 2908 } else { 2909 /* determine buffer space needed for message */ 2910 nz = 0; 2911 for (i=0; i<m; i++) { 2912 nz += ourlens[i]; 2913 } 2914 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2915 2916 /* receive message of column indices*/ 2917 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2918 } 2919 2920 /* determine column ownership if matrix is not square */ 2921 if (N != M) { 2922 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2923 else n = newMat->cmap->n; 2924 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2925 cstart = cend - n; 2926 } else { 2927 cstart = rstart; 2928 cend = rend; 2929 n = cend - cstart; 2930 } 2931 2932 /* loop over local rows, determining number of off diagonal entries */ 2933 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2934 jj = 0; 2935 for (i=0; i<m; i++) { 2936 for (j=0; j<ourlens[i]; j++) { 2937 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2938 jj++; 2939 } 2940 } 2941 2942 for (i=0; i<m; i++) { 2943 ourlens[i] -= offlens[i]; 2944 } 2945 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2946 2947 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2948 2949 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2950 2951 for (i=0; i<m; i++) { 2952 ourlens[i] += offlens[i]; 2953 } 2954 2955 if (!rank) { 2956 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2957 2958 /* read in my part of the matrix numerical values */ 2959 nz = procsnz[0]; 2960 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2961 2962 /* insert into matrix */ 2963 jj = rstart; 2964 smycols = mycols; 2965 svals = vals; 2966 for (i=0; i<m; i++) { 2967 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2968 smycols += ourlens[i]; 2969 svals += ourlens[i]; 2970 jj++; 2971 } 2972 2973 /* read in other processors and ship out */ 2974 for (i=1; i<size; i++) { 2975 nz = procsnz[i]; 2976 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2977 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2978 } 2979 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2980 } else { 2981 /* receive numeric values */ 2982 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2983 2984 /* receive message of values*/ 2985 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2986 2987 /* insert into matrix */ 2988 jj = rstart; 2989 smycols = mycols; 2990 svals = vals; 2991 for (i=0; i<m; i++) { 2992 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2993 smycols += ourlens[i]; 2994 svals += ourlens[i]; 2995 jj++; 2996 } 2997 } 2998 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2999 ierr = PetscFree(vals);CHKERRQ(ierr); 3000 ierr = PetscFree(mycols);CHKERRQ(ierr); 3001 ierr = PetscFree(rowners);CHKERRQ(ierr); 3002 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3003 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3004 PetscFunctionReturn(0); 3005 } 3006 3007 /* Not scalable because of ISAllGather() unless getting all columns. */ 3008 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3009 { 3010 PetscErrorCode ierr; 3011 IS iscol_local; 3012 PetscBool isstride; 3013 PetscMPIInt lisstride=0,gisstride; 3014 3015 PetscFunctionBegin; 3016 /* check if we are grabbing all columns*/ 3017 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3018 3019 if (isstride) { 3020 PetscInt start,len,mstart,mlen; 3021 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3022 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3023 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3024 if (mstart == start && mlen-mstart == len) lisstride = 1; 3025 } 3026 3027 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3028 if (gisstride) { 3029 PetscInt N; 3030 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3031 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3032 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3033 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3034 } else { 3035 PetscInt cbs; 3036 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3037 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3038 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3039 } 3040 3041 *isseq = iscol_local; 3042 PetscFunctionReturn(0); 3043 } 3044 3045 /* 3046 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3047 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3048 3049 Input Parameters: 3050 mat - matrix 3051 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3052 i.e., mat->rstart <= isrow[i] < mat->rend 3053 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3054 i.e., mat->cstart <= iscol[i] < mat->cend 3055 Output Parameter: 3056 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3057 iscol_o - sequential column index set for retrieving mat->B 3058 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3059 */ 3060 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3061 { 3062 PetscErrorCode ierr; 3063 Vec x,cmap; 3064 const PetscInt *is_idx; 3065 PetscScalar *xarray,*cmaparray; 3066 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3067 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3068 Mat B=a->B; 3069 Vec lvec=a->lvec,lcmap; 3070 PetscInt i,cstart,cend,Bn=B->cmap->N; 3071 MPI_Comm comm; 3072 VecScatter Mvctx=a->Mvctx; 3073 3074 PetscFunctionBegin; 3075 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3076 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3077 3078 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3079 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3080 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3081 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3082 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3083 3084 /* Get start indices */ 3085 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3086 isstart -= ncols; 3087 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3088 3089 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3090 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3091 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3092 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3093 for (i=0; i<ncols; i++) { 3094 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3095 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3096 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3097 } 3098 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3099 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3100 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3101 3102 /* Get iscol_d */ 3103 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3104 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3105 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3106 3107 /* Get isrow_d */ 3108 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3109 rstart = mat->rmap->rstart; 3110 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3111 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3112 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3113 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3114 3115 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3116 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3117 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3118 3119 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3120 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3121 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3122 3123 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3124 3125 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3126 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3127 3128 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3129 /* off-process column indices */ 3130 count = 0; 3131 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3132 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3133 3134 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3135 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3136 for (i=0; i<Bn; i++) { 3137 if (PetscRealPart(xarray[i]) > -1.0) { 3138 idx[count] = i; /* local column index in off-diagonal part B */ 3139 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3140 count++; 3141 } 3142 } 3143 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3144 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3145 3146 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3147 /* cannot ensure iscol_o has same blocksize as iscol! */ 3148 3149 ierr = PetscFree(idx);CHKERRQ(ierr); 3150 *garray = cmap1; 3151 3152 ierr = VecDestroy(&x);CHKERRQ(ierr); 3153 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3154 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3155 PetscFunctionReturn(0); 3156 } 3157 3158 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3159 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3160 { 3161 PetscErrorCode ierr; 3162 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3163 Mat M = NULL; 3164 MPI_Comm comm; 3165 IS iscol_d,isrow_d,iscol_o; 3166 Mat Asub = NULL,Bsub = NULL; 3167 PetscInt n; 3168 3169 PetscFunctionBegin; 3170 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3171 3172 if (call == MAT_REUSE_MATRIX) { 3173 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3174 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3175 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3176 3177 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3178 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3179 3180 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3181 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3182 3183 /* Update diagonal and off-diagonal portions of submat */ 3184 asub = (Mat_MPIAIJ*)(*submat)->data; 3185 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3186 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3187 if (n) { 3188 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3189 } 3190 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3191 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3192 3193 } else { /* call == MAT_INITIAL_MATRIX) */ 3194 const PetscInt *garray; 3195 PetscInt BsubN; 3196 3197 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3198 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3199 3200 /* Create local submatrices Asub and Bsub */ 3201 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3202 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3203 3204 /* Create submatrix M */ 3205 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3206 3207 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3208 asub = (Mat_MPIAIJ*)M->data; 3209 3210 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3211 n = asub->B->cmap->N; 3212 if (BsubN > n) { 3213 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3214 const PetscInt *idx; 3215 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3216 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3217 3218 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3219 j = 0; 3220 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3221 for (i=0; i<n; i++) { 3222 if (j >= BsubN) break; 3223 while (subgarray[i] > garray[j]) j++; 3224 3225 if (subgarray[i] == garray[j]) { 3226 idx_new[i] = idx[j++]; 3227 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3228 } 3229 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3230 3231 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3232 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3233 3234 } else if (BsubN < n) { 3235 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3236 } 3237 3238 ierr = PetscFree(garray);CHKERRQ(ierr); 3239 *submat = M; 3240 3241 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3242 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3243 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3244 3245 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3246 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3247 3248 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3249 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3250 } 3251 PetscFunctionReturn(0); 3252 } 3253 3254 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3255 { 3256 PetscErrorCode ierr; 3257 IS iscol_local=NULL,isrow_d; 3258 PetscInt csize; 3259 PetscInt n,i,j,start,end; 3260 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3261 MPI_Comm comm; 3262 3263 PetscFunctionBegin; 3264 /* If isrow has same processor distribution as mat, 3265 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3266 if (call == MAT_REUSE_MATRIX) { 3267 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3268 if (isrow_d) { 3269 sameRowDist = PETSC_TRUE; 3270 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3271 } else { 3272 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3273 if (iscol_local) { 3274 sameRowDist = PETSC_TRUE; 3275 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3276 } 3277 } 3278 } else { 3279 /* Check if isrow has same processor distribution as mat */ 3280 sameDist[0] = PETSC_FALSE; 3281 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3282 if (!n) { 3283 sameDist[0] = PETSC_TRUE; 3284 } else { 3285 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3286 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3287 if (i >= start && j < end) { 3288 sameDist[0] = PETSC_TRUE; 3289 } 3290 } 3291 3292 /* Check if iscol has same processor distribution as mat */ 3293 sameDist[1] = PETSC_FALSE; 3294 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3295 if (!n) { 3296 sameDist[1] = PETSC_TRUE; 3297 } else { 3298 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3299 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3300 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3301 } 3302 3303 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3304 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3305 sameRowDist = tsameDist[0]; 3306 } 3307 3308 if (sameRowDist) { 3309 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3310 /* isrow and iscol have same processor distribution as mat */ 3311 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3312 PetscFunctionReturn(0); 3313 } else { /* sameRowDist */ 3314 /* isrow has same processor distribution as mat */ 3315 if (call == MAT_INITIAL_MATRIX) { 3316 PetscBool sorted; 3317 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3318 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3319 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3320 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3321 3322 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3323 if (sorted) { 3324 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3325 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3326 PetscFunctionReturn(0); 3327 } 3328 } else { /* call == MAT_REUSE_MATRIX */ 3329 IS iscol_sub; 3330 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3331 if (iscol_sub) { 3332 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3333 PetscFunctionReturn(0); 3334 } 3335 } 3336 } 3337 } 3338 3339 /* General case: iscol -> iscol_local which has global size of iscol */ 3340 if (call == MAT_REUSE_MATRIX) { 3341 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3342 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3343 } else { 3344 if (!iscol_local) { 3345 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3346 } 3347 } 3348 3349 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3350 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3351 3352 if (call == MAT_INITIAL_MATRIX) { 3353 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3354 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3355 } 3356 PetscFunctionReturn(0); 3357 } 3358 3359 /*@C 3360 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3361 and "off-diagonal" part of the matrix in CSR format. 3362 3363 Collective on MPI_Comm 3364 3365 Input Parameters: 3366 + comm - MPI communicator 3367 . A - "diagonal" portion of matrix 3368 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3369 - garray - global index of B columns 3370 3371 Output Parameter: 3372 . mat - the matrix, with input A as its local diagonal matrix 3373 Level: advanced 3374 3375 Notes: 3376 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3377 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3378 3379 .seealso: MatCreateMPIAIJWithSplitArrays() 3380 @*/ 3381 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3382 { 3383 PetscErrorCode ierr; 3384 Mat_MPIAIJ *maij; 3385 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3386 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3387 PetscScalar *oa=b->a; 3388 Mat Bnew; 3389 PetscInt m,n,N; 3390 3391 PetscFunctionBegin; 3392 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3393 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3394 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3395 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3396 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3397 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3398 3399 /* Get global columns of mat */ 3400 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3401 3402 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3403 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3404 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3405 maij = (Mat_MPIAIJ*)(*mat)->data; 3406 3407 (*mat)->preallocated = PETSC_TRUE; 3408 3409 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3410 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3411 3412 /* Set A as diagonal portion of *mat */ 3413 maij->A = A; 3414 3415 nz = oi[m]; 3416 for (i=0; i<nz; i++) { 3417 col = oj[i]; 3418 oj[i] = garray[col]; 3419 } 3420 3421 /* Set Bnew as off-diagonal portion of *mat */ 3422 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3423 bnew = (Mat_SeqAIJ*)Bnew->data; 3424 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3425 maij->B = Bnew; 3426 3427 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3428 3429 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3430 b->free_a = PETSC_FALSE; 3431 b->free_ij = PETSC_FALSE; 3432 ierr = MatDestroy(&B);CHKERRQ(ierr); 3433 3434 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3435 bnew->free_a = PETSC_TRUE; 3436 bnew->free_ij = PETSC_TRUE; 3437 3438 /* condense columns of maij->B */ 3439 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3440 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3441 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3442 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3443 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3444 PetscFunctionReturn(0); 3445 } 3446 3447 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3448 3449 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3450 { 3451 PetscErrorCode ierr; 3452 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3453 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3454 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3455 Mat M,Msub,B=a->B; 3456 MatScalar *aa; 3457 Mat_SeqAIJ *aij; 3458 PetscInt *garray = a->garray,*colsub,Ncols; 3459 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3460 IS iscol_sub,iscmap; 3461 const PetscInt *is_idx,*cmap; 3462 PetscBool allcolumns=PETSC_FALSE; 3463 MPI_Comm comm; 3464 3465 PetscFunctionBegin; 3466 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3467 3468 if (call == MAT_REUSE_MATRIX) { 3469 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3470 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3471 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3472 3473 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3474 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3475 3476 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3477 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3478 3479 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3480 3481 } else { /* call == MAT_INITIAL_MATRIX) */ 3482 PetscBool flg; 3483 3484 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3485 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3486 3487 /* (1) iscol -> nonscalable iscol_local */ 3488 /* Check for special case: each processor gets entire matrix columns */ 3489 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3490 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3491 if (allcolumns) { 3492 iscol_sub = iscol_local; 3493 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3494 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3495 3496 } else { 3497 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3498 PetscInt *idx,*cmap1,k; 3499 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3500 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3501 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3502 count = 0; 3503 k = 0; 3504 for (i=0; i<Ncols; i++) { 3505 j = is_idx[i]; 3506 if (j >= cstart && j < cend) { 3507 /* diagonal part of mat */ 3508 idx[count] = j; 3509 cmap1[count++] = i; /* column index in submat */ 3510 } else if (Bn) { 3511 /* off-diagonal part of mat */ 3512 if (j == garray[k]) { 3513 idx[count] = j; 3514 cmap1[count++] = i; /* column index in submat */ 3515 } else if (j > garray[k]) { 3516 while (j > garray[k] && k < Bn-1) k++; 3517 if (j == garray[k]) { 3518 idx[count] = j; 3519 cmap1[count++] = i; /* column index in submat */ 3520 } 3521 } 3522 } 3523 } 3524 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3525 3526 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3527 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3528 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3529 3530 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3531 } 3532 3533 /* (3) Create sequential Msub */ 3534 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3535 } 3536 3537 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3538 aij = (Mat_SeqAIJ*)(Msub)->data; 3539 ii = aij->i; 3540 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3541 3542 /* 3543 m - number of local rows 3544 Ncols - number of columns (same on all processors) 3545 rstart - first row in new global matrix generated 3546 */ 3547 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3548 3549 if (call == MAT_INITIAL_MATRIX) { 3550 /* (4) Create parallel newmat */ 3551 PetscMPIInt rank,size; 3552 PetscInt csize; 3553 3554 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3555 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3556 3557 /* 3558 Determine the number of non-zeros in the diagonal and off-diagonal 3559 portions of the matrix in order to do correct preallocation 3560 */ 3561 3562 /* first get start and end of "diagonal" columns */ 3563 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3564 if (csize == PETSC_DECIDE) { 3565 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3566 if (mglobal == Ncols) { /* square matrix */ 3567 nlocal = m; 3568 } else { 3569 nlocal = Ncols/size + ((Ncols % size) > rank); 3570 } 3571 } else { 3572 nlocal = csize; 3573 } 3574 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3575 rstart = rend - nlocal; 3576 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3577 3578 /* next, compute all the lengths */ 3579 jj = aij->j; 3580 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3581 olens = dlens + m; 3582 for (i=0; i<m; i++) { 3583 jend = ii[i+1] - ii[i]; 3584 olen = 0; 3585 dlen = 0; 3586 for (j=0; j<jend; j++) { 3587 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3588 else dlen++; 3589 jj++; 3590 } 3591 olens[i] = olen; 3592 dlens[i] = dlen; 3593 } 3594 3595 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3596 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3597 3598 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3599 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3600 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3601 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3602 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3603 ierr = PetscFree(dlens);CHKERRQ(ierr); 3604 3605 } else { /* call == MAT_REUSE_MATRIX */ 3606 M = *newmat; 3607 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3608 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3609 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3610 /* 3611 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3612 rather than the slower MatSetValues(). 3613 */ 3614 M->was_assembled = PETSC_TRUE; 3615 M->assembled = PETSC_FALSE; 3616 } 3617 3618 /* (5) Set values of Msub to *newmat */ 3619 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3620 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3621 3622 jj = aij->j; 3623 aa = aij->a; 3624 for (i=0; i<m; i++) { 3625 row = rstart + i; 3626 nz = ii[i+1] - ii[i]; 3627 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3628 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3629 jj += nz; aa += nz; 3630 } 3631 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3632 3633 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3634 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3635 3636 ierr = PetscFree(colsub);CHKERRQ(ierr); 3637 3638 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3639 if (call == MAT_INITIAL_MATRIX) { 3640 *newmat = M; 3641 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3642 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3643 3644 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3645 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3646 3647 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3648 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3649 3650 if (iscol_local) { 3651 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3652 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3653 } 3654 } 3655 PetscFunctionReturn(0); 3656 } 3657 3658 /* 3659 Not great since it makes two copies of the submatrix, first an SeqAIJ 3660 in local and then by concatenating the local matrices the end result. 3661 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3662 3663 Note: This requires a sequential iscol with all indices. 3664 */ 3665 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3666 { 3667 PetscErrorCode ierr; 3668 PetscMPIInt rank,size; 3669 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3670 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3671 Mat M,Mreuse; 3672 MatScalar *aa,*vwork; 3673 MPI_Comm comm; 3674 Mat_SeqAIJ *aij; 3675 PetscBool colflag,allcolumns=PETSC_FALSE; 3676 3677 PetscFunctionBegin; 3678 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3679 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3680 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3681 3682 /* Check for special case: each processor gets entire matrix columns */ 3683 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3684 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3685 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3686 3687 if (call == MAT_REUSE_MATRIX) { 3688 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3689 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3690 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3691 } else { 3692 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3693 } 3694 3695 /* 3696 m - number of local rows 3697 n - number of columns (same on all processors) 3698 rstart - first row in new global matrix generated 3699 */ 3700 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3701 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3702 if (call == MAT_INITIAL_MATRIX) { 3703 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3704 ii = aij->i; 3705 jj = aij->j; 3706 3707 /* 3708 Determine the number of non-zeros in the diagonal and off-diagonal 3709 portions of the matrix in order to do correct preallocation 3710 */ 3711 3712 /* first get start and end of "diagonal" columns */ 3713 if (csize == PETSC_DECIDE) { 3714 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3715 if (mglobal == n) { /* square matrix */ 3716 nlocal = m; 3717 } else { 3718 nlocal = n/size + ((n % size) > rank); 3719 } 3720 } else { 3721 nlocal = csize; 3722 } 3723 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3724 rstart = rend - nlocal; 3725 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3726 3727 /* next, compute all the lengths */ 3728 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3729 olens = dlens + m; 3730 for (i=0; i<m; i++) { 3731 jend = ii[i+1] - ii[i]; 3732 olen = 0; 3733 dlen = 0; 3734 for (j=0; j<jend; j++) { 3735 if (*jj < rstart || *jj >= rend) olen++; 3736 else dlen++; 3737 jj++; 3738 } 3739 olens[i] = olen; 3740 dlens[i] = dlen; 3741 } 3742 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3743 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3744 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3745 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3746 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3747 ierr = PetscFree(dlens);CHKERRQ(ierr); 3748 } else { 3749 PetscInt ml,nl; 3750 3751 M = *newmat; 3752 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3753 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3754 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3755 /* 3756 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3757 rather than the slower MatSetValues(). 3758 */ 3759 M->was_assembled = PETSC_TRUE; 3760 M->assembled = PETSC_FALSE; 3761 } 3762 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3763 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3764 ii = aij->i; 3765 jj = aij->j; 3766 aa = aij->a; 3767 for (i=0; i<m; i++) { 3768 row = rstart + i; 3769 nz = ii[i+1] - ii[i]; 3770 cwork = jj; jj += nz; 3771 vwork = aa; aa += nz; 3772 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3773 } 3774 3775 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3776 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3777 *newmat = M; 3778 3779 /* save submatrix used in processor for next request */ 3780 if (call == MAT_INITIAL_MATRIX) { 3781 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3782 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3783 } 3784 PetscFunctionReturn(0); 3785 } 3786 3787 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3788 { 3789 PetscInt m,cstart, cend,j,nnz,i,d; 3790 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3791 const PetscInt *JJ; 3792 PetscScalar *values; 3793 PetscErrorCode ierr; 3794 PetscBool nooffprocentries; 3795 3796 PetscFunctionBegin; 3797 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3798 3799 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3800 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3801 m = B->rmap->n; 3802 cstart = B->cmap->rstart; 3803 cend = B->cmap->rend; 3804 rstart = B->rmap->rstart; 3805 3806 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3807 3808 #if defined(PETSC_USE_DEBUG) 3809 for (i=0; i<m; i++) { 3810 nnz = Ii[i+1]- Ii[i]; 3811 JJ = J + Ii[i]; 3812 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3813 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3814 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3815 } 3816 #endif 3817 3818 for (i=0; i<m; i++) { 3819 nnz = Ii[i+1]- Ii[i]; 3820 JJ = J + Ii[i]; 3821 nnz_max = PetscMax(nnz_max,nnz); 3822 d = 0; 3823 for (j=0; j<nnz; j++) { 3824 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3825 } 3826 d_nnz[i] = d; 3827 o_nnz[i] = nnz - d; 3828 } 3829 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3830 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3831 3832 if (v) values = (PetscScalar*)v; 3833 else { 3834 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3835 } 3836 3837 for (i=0; i<m; i++) { 3838 ii = i + rstart; 3839 nnz = Ii[i+1]- Ii[i]; 3840 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3841 } 3842 nooffprocentries = B->nooffprocentries; 3843 B->nooffprocentries = PETSC_TRUE; 3844 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3845 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3846 B->nooffprocentries = nooffprocentries; 3847 3848 if (!v) { 3849 ierr = PetscFree(values);CHKERRQ(ierr); 3850 } 3851 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3852 PetscFunctionReturn(0); 3853 } 3854 3855 /*@ 3856 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3857 (the default parallel PETSc format). 3858 3859 Collective on MPI_Comm 3860 3861 Input Parameters: 3862 + B - the matrix 3863 . i - the indices into j for the start of each local row (starts with zero) 3864 . j - the column indices for each local row (starts with zero) 3865 - v - optional values in the matrix 3866 3867 Level: developer 3868 3869 Notes: 3870 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3871 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3872 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3873 3874 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3875 3876 The format which is used for the sparse matrix input, is equivalent to a 3877 row-major ordering.. i.e for the following matrix, the input data expected is 3878 as shown 3879 3880 $ 1 0 0 3881 $ 2 0 3 P0 3882 $ ------- 3883 $ 4 5 6 P1 3884 $ 3885 $ Process0 [P0]: rows_owned=[0,1] 3886 $ i = {0,1,3} [size = nrow+1 = 2+1] 3887 $ j = {0,0,2} [size = 3] 3888 $ v = {1,2,3} [size = 3] 3889 $ 3890 $ Process1 [P1]: rows_owned=[2] 3891 $ i = {0,3} [size = nrow+1 = 1+1] 3892 $ j = {0,1,2} [size = 3] 3893 $ v = {4,5,6} [size = 3] 3894 3895 .keywords: matrix, aij, compressed row, sparse, parallel 3896 3897 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3898 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3899 @*/ 3900 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3901 { 3902 PetscErrorCode ierr; 3903 3904 PetscFunctionBegin; 3905 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3906 PetscFunctionReturn(0); 3907 } 3908 3909 /*@C 3910 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3911 (the default parallel PETSc format). For good matrix assembly performance 3912 the user should preallocate the matrix storage by setting the parameters 3913 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3914 performance can be increased by more than a factor of 50. 3915 3916 Collective on MPI_Comm 3917 3918 Input Parameters: 3919 + B - the matrix 3920 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3921 (same value is used for all local rows) 3922 . d_nnz - array containing the number of nonzeros in the various rows of the 3923 DIAGONAL portion of the local submatrix (possibly different for each row) 3924 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3925 The size of this array is equal to the number of local rows, i.e 'm'. 3926 For matrices that will be factored, you must leave room for (and set) 3927 the diagonal entry even if it is zero. 3928 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3929 submatrix (same value is used for all local rows). 3930 - o_nnz - array containing the number of nonzeros in the various rows of the 3931 OFF-DIAGONAL portion of the local submatrix (possibly different for 3932 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3933 structure. The size of this array is equal to the number 3934 of local rows, i.e 'm'. 3935 3936 If the *_nnz parameter is given then the *_nz parameter is ignored 3937 3938 The AIJ format (also called the Yale sparse matrix format or 3939 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3940 storage. The stored row and column indices begin with zero. 3941 See Users-Manual: ch_mat for details. 3942 3943 The parallel matrix is partitioned such that the first m0 rows belong to 3944 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3945 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3946 3947 The DIAGONAL portion of the local submatrix of a processor can be defined 3948 as the submatrix which is obtained by extraction the part corresponding to 3949 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3950 first row that belongs to the processor, r2 is the last row belonging to 3951 the this processor, and c1-c2 is range of indices of the local part of a 3952 vector suitable for applying the matrix to. This is an mxn matrix. In the 3953 common case of a square matrix, the row and column ranges are the same and 3954 the DIAGONAL part is also square. The remaining portion of the local 3955 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3956 3957 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3958 3959 You can call MatGetInfo() to get information on how effective the preallocation was; 3960 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3961 You can also run with the option -info and look for messages with the string 3962 malloc in them to see if additional memory allocation was needed. 3963 3964 Example usage: 3965 3966 Consider the following 8x8 matrix with 34 non-zero values, that is 3967 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3968 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3969 as follows: 3970 3971 .vb 3972 1 2 0 | 0 3 0 | 0 4 3973 Proc0 0 5 6 | 7 0 0 | 8 0 3974 9 0 10 | 11 0 0 | 12 0 3975 ------------------------------------- 3976 13 0 14 | 15 16 17 | 0 0 3977 Proc1 0 18 0 | 19 20 21 | 0 0 3978 0 0 0 | 22 23 0 | 24 0 3979 ------------------------------------- 3980 Proc2 25 26 27 | 0 0 28 | 29 0 3981 30 0 0 | 31 32 33 | 0 34 3982 .ve 3983 3984 This can be represented as a collection of submatrices as: 3985 3986 .vb 3987 A B C 3988 D E F 3989 G H I 3990 .ve 3991 3992 Where the submatrices A,B,C are owned by proc0, D,E,F are 3993 owned by proc1, G,H,I are owned by proc2. 3994 3995 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3996 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3997 The 'M','N' parameters are 8,8, and have the same values on all procs. 3998 3999 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4000 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4001 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4002 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4003 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4004 matrix, ans [DF] as another SeqAIJ matrix. 4005 4006 When d_nz, o_nz parameters are specified, d_nz storage elements are 4007 allocated for every row of the local diagonal submatrix, and o_nz 4008 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4009 One way to choose d_nz and o_nz is to use the max nonzerors per local 4010 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4011 In this case, the values of d_nz,o_nz are: 4012 .vb 4013 proc0 : dnz = 2, o_nz = 2 4014 proc1 : dnz = 3, o_nz = 2 4015 proc2 : dnz = 1, o_nz = 4 4016 .ve 4017 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4018 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4019 for proc3. i.e we are using 12+15+10=37 storage locations to store 4020 34 values. 4021 4022 When d_nnz, o_nnz parameters are specified, the storage is specified 4023 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4024 In the above case the values for d_nnz,o_nnz are: 4025 .vb 4026 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4027 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4028 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4029 .ve 4030 Here the space allocated is sum of all the above values i.e 34, and 4031 hence pre-allocation is perfect. 4032 4033 Level: intermediate 4034 4035 .keywords: matrix, aij, compressed row, sparse, parallel 4036 4037 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4038 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4039 @*/ 4040 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4041 { 4042 PetscErrorCode ierr; 4043 4044 PetscFunctionBegin; 4045 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4046 PetscValidType(B,1); 4047 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4048 PetscFunctionReturn(0); 4049 } 4050 4051 /*@ 4052 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4053 CSR format the local rows. 4054 4055 Collective on MPI_Comm 4056 4057 Input Parameters: 4058 + comm - MPI communicator 4059 . m - number of local rows (Cannot be PETSC_DECIDE) 4060 . n - This value should be the same as the local size used in creating the 4061 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4062 calculated if N is given) For square matrices n is almost always m. 4063 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4064 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4065 . i - row indices 4066 . j - column indices 4067 - a - matrix values 4068 4069 Output Parameter: 4070 . mat - the matrix 4071 4072 Level: intermediate 4073 4074 Notes: 4075 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4076 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4077 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4078 4079 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4080 4081 The format which is used for the sparse matrix input, is equivalent to a 4082 row-major ordering.. i.e for the following matrix, the input data expected is 4083 as shown 4084 4085 $ 1 0 0 4086 $ 2 0 3 P0 4087 $ ------- 4088 $ 4 5 6 P1 4089 $ 4090 $ Process0 [P0]: rows_owned=[0,1] 4091 $ i = {0,1,3} [size = nrow+1 = 2+1] 4092 $ j = {0,0,2} [size = 3] 4093 $ v = {1,2,3} [size = 3] 4094 $ 4095 $ Process1 [P1]: rows_owned=[2] 4096 $ i = {0,3} [size = nrow+1 = 1+1] 4097 $ j = {0,1,2} [size = 3] 4098 $ v = {4,5,6} [size = 3] 4099 4100 .keywords: matrix, aij, compressed row, sparse, parallel 4101 4102 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4103 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4104 @*/ 4105 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4106 { 4107 PetscErrorCode ierr; 4108 4109 PetscFunctionBegin; 4110 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4111 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4112 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4113 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4114 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4115 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4116 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4117 PetscFunctionReturn(0); 4118 } 4119 4120 /*@C 4121 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4122 (the default parallel PETSc format). For good matrix assembly performance 4123 the user should preallocate the matrix storage by setting the parameters 4124 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4125 performance can be increased by more than a factor of 50. 4126 4127 Collective on MPI_Comm 4128 4129 Input Parameters: 4130 + comm - MPI communicator 4131 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4132 This value should be the same as the local size used in creating the 4133 y vector for the matrix-vector product y = Ax. 4134 . n - This value should be the same as the local size used in creating the 4135 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4136 calculated if N is given) For square matrices n is almost always m. 4137 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4138 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4139 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4140 (same value is used for all local rows) 4141 . d_nnz - array containing the number of nonzeros in the various rows of the 4142 DIAGONAL portion of the local submatrix (possibly different for each row) 4143 or NULL, if d_nz is used to specify the nonzero structure. 4144 The size of this array is equal to the number of local rows, i.e 'm'. 4145 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4146 submatrix (same value is used for all local rows). 4147 - o_nnz - array containing the number of nonzeros in the various rows of the 4148 OFF-DIAGONAL portion of the local submatrix (possibly different for 4149 each row) or NULL, if o_nz is used to specify the nonzero 4150 structure. The size of this array is equal to the number 4151 of local rows, i.e 'm'. 4152 4153 Output Parameter: 4154 . A - the matrix 4155 4156 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4157 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4158 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4159 4160 Notes: 4161 If the *_nnz parameter is given then the *_nz parameter is ignored 4162 4163 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4164 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4165 storage requirements for this matrix. 4166 4167 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4168 processor than it must be used on all processors that share the object for 4169 that argument. 4170 4171 The user MUST specify either the local or global matrix dimensions 4172 (possibly both). 4173 4174 The parallel matrix is partitioned across processors such that the 4175 first m0 rows belong to process 0, the next m1 rows belong to 4176 process 1, the next m2 rows belong to process 2 etc.. where 4177 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4178 values corresponding to [m x N] submatrix. 4179 4180 The columns are logically partitioned with the n0 columns belonging 4181 to 0th partition, the next n1 columns belonging to the next 4182 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4183 4184 The DIAGONAL portion of the local submatrix on any given processor 4185 is the submatrix corresponding to the rows and columns m,n 4186 corresponding to the given processor. i.e diagonal matrix on 4187 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4188 etc. The remaining portion of the local submatrix [m x (N-n)] 4189 constitute the OFF-DIAGONAL portion. The example below better 4190 illustrates this concept. 4191 4192 For a square global matrix we define each processor's diagonal portion 4193 to be its local rows and the corresponding columns (a square submatrix); 4194 each processor's off-diagonal portion encompasses the remainder of the 4195 local matrix (a rectangular submatrix). 4196 4197 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4198 4199 When calling this routine with a single process communicator, a matrix of 4200 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4201 type of communicator, use the construction mechanism 4202 .vb 4203 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4204 .ve 4205 4206 $ MatCreate(...,&A); 4207 $ MatSetType(A,MATMPIAIJ); 4208 $ MatSetSizes(A, m,n,M,N); 4209 $ MatMPIAIJSetPreallocation(A,...); 4210 4211 By default, this format uses inodes (identical nodes) when possible. 4212 We search for consecutive rows with the same nonzero structure, thereby 4213 reusing matrix information to achieve increased efficiency. 4214 4215 Options Database Keys: 4216 + -mat_no_inode - Do not use inodes 4217 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4218 4219 4220 4221 Example usage: 4222 4223 Consider the following 8x8 matrix with 34 non-zero values, that is 4224 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4225 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4226 as follows 4227 4228 .vb 4229 1 2 0 | 0 3 0 | 0 4 4230 Proc0 0 5 6 | 7 0 0 | 8 0 4231 9 0 10 | 11 0 0 | 12 0 4232 ------------------------------------- 4233 13 0 14 | 15 16 17 | 0 0 4234 Proc1 0 18 0 | 19 20 21 | 0 0 4235 0 0 0 | 22 23 0 | 24 0 4236 ------------------------------------- 4237 Proc2 25 26 27 | 0 0 28 | 29 0 4238 30 0 0 | 31 32 33 | 0 34 4239 .ve 4240 4241 This can be represented as a collection of submatrices as 4242 4243 .vb 4244 A B C 4245 D E F 4246 G H I 4247 .ve 4248 4249 Where the submatrices A,B,C are owned by proc0, D,E,F are 4250 owned by proc1, G,H,I are owned by proc2. 4251 4252 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4253 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4254 The 'M','N' parameters are 8,8, and have the same values on all procs. 4255 4256 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4257 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4258 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4259 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4260 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4261 matrix, ans [DF] as another SeqAIJ matrix. 4262 4263 When d_nz, o_nz parameters are specified, d_nz storage elements are 4264 allocated for every row of the local diagonal submatrix, and o_nz 4265 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4266 One way to choose d_nz and o_nz is to use the max nonzerors per local 4267 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4268 In this case, the values of d_nz,o_nz are 4269 .vb 4270 proc0 : dnz = 2, o_nz = 2 4271 proc1 : dnz = 3, o_nz = 2 4272 proc2 : dnz = 1, o_nz = 4 4273 .ve 4274 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4275 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4276 for proc3. i.e we are using 12+15+10=37 storage locations to store 4277 34 values. 4278 4279 When d_nnz, o_nnz parameters are specified, the storage is specified 4280 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4281 In the above case the values for d_nnz,o_nnz are 4282 .vb 4283 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4284 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4285 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4286 .ve 4287 Here the space allocated is sum of all the above values i.e 34, and 4288 hence pre-allocation is perfect. 4289 4290 Level: intermediate 4291 4292 .keywords: matrix, aij, compressed row, sparse, parallel 4293 4294 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4295 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4296 @*/ 4297 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4298 { 4299 PetscErrorCode ierr; 4300 PetscMPIInt size; 4301 4302 PetscFunctionBegin; 4303 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4304 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4305 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4306 if (size > 1) { 4307 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4308 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4309 } else { 4310 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4311 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4312 } 4313 PetscFunctionReturn(0); 4314 } 4315 4316 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4317 { 4318 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4319 PetscBool flg; 4320 PetscErrorCode ierr; 4321 4322 PetscFunctionBegin; 4323 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4324 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4325 if (Ad) *Ad = a->A; 4326 if (Ao) *Ao = a->B; 4327 if (colmap) *colmap = a->garray; 4328 PetscFunctionReturn(0); 4329 } 4330 4331 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4332 { 4333 PetscErrorCode ierr; 4334 PetscInt m,N,i,rstart,nnz,Ii; 4335 PetscInt *indx; 4336 PetscScalar *values; 4337 4338 PetscFunctionBegin; 4339 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4340 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4341 PetscInt *dnz,*onz,sum,bs,cbs; 4342 4343 if (n == PETSC_DECIDE) { 4344 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4345 } 4346 /* Check sum(n) = N */ 4347 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4348 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4349 4350 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4351 rstart -= m; 4352 4353 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4354 for (i=0; i<m; i++) { 4355 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4356 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4357 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4358 } 4359 4360 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4361 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4362 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4363 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4364 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4365 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4366 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4367 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4368 } 4369 4370 /* numeric phase */ 4371 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4372 for (i=0; i<m; i++) { 4373 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4374 Ii = i + rstart; 4375 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4376 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4377 } 4378 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4379 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4380 PetscFunctionReturn(0); 4381 } 4382 4383 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4384 { 4385 PetscErrorCode ierr; 4386 PetscMPIInt rank; 4387 PetscInt m,N,i,rstart,nnz; 4388 size_t len; 4389 const PetscInt *indx; 4390 PetscViewer out; 4391 char *name; 4392 Mat B; 4393 const PetscScalar *values; 4394 4395 PetscFunctionBegin; 4396 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4397 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4398 /* Should this be the type of the diagonal block of A? */ 4399 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4400 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4401 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4402 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4403 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4404 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4405 for (i=0; i<m; i++) { 4406 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4407 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4408 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4409 } 4410 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4411 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4412 4413 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4414 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4415 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4416 sprintf(name,"%s.%d",outfile,rank); 4417 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4418 ierr = PetscFree(name);CHKERRQ(ierr); 4419 ierr = MatView(B,out);CHKERRQ(ierr); 4420 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4421 ierr = MatDestroy(&B);CHKERRQ(ierr); 4422 PetscFunctionReturn(0); 4423 } 4424 4425 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4426 { 4427 PetscErrorCode ierr; 4428 Mat_Merge_SeqsToMPI *merge; 4429 PetscContainer container; 4430 4431 PetscFunctionBegin; 4432 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4433 if (container) { 4434 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4435 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4436 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4437 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4438 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4439 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4440 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4441 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4442 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4443 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4444 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4445 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4446 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4447 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4448 ierr = PetscFree(merge);CHKERRQ(ierr); 4449 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4450 } 4451 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4452 PetscFunctionReturn(0); 4453 } 4454 4455 #include <../src/mat/utils/freespace.h> 4456 #include <petscbt.h> 4457 4458 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4459 { 4460 PetscErrorCode ierr; 4461 MPI_Comm comm; 4462 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4463 PetscMPIInt size,rank,taga,*len_s; 4464 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4465 PetscInt proc,m; 4466 PetscInt **buf_ri,**buf_rj; 4467 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4468 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4469 MPI_Request *s_waits,*r_waits; 4470 MPI_Status *status; 4471 MatScalar *aa=a->a; 4472 MatScalar **abuf_r,*ba_i; 4473 Mat_Merge_SeqsToMPI *merge; 4474 PetscContainer container; 4475 4476 PetscFunctionBegin; 4477 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4478 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4479 4480 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4481 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4482 4483 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4484 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4485 4486 bi = merge->bi; 4487 bj = merge->bj; 4488 buf_ri = merge->buf_ri; 4489 buf_rj = merge->buf_rj; 4490 4491 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4492 owners = merge->rowmap->range; 4493 len_s = merge->len_s; 4494 4495 /* send and recv matrix values */ 4496 /*-----------------------------*/ 4497 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4498 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4499 4500 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4501 for (proc=0,k=0; proc<size; proc++) { 4502 if (!len_s[proc]) continue; 4503 i = owners[proc]; 4504 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4505 k++; 4506 } 4507 4508 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4509 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4510 ierr = PetscFree(status);CHKERRQ(ierr); 4511 4512 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4513 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4514 4515 /* insert mat values of mpimat */ 4516 /*----------------------------*/ 4517 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4518 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4519 4520 for (k=0; k<merge->nrecv; k++) { 4521 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4522 nrows = *(buf_ri_k[k]); 4523 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4524 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4525 } 4526 4527 /* set values of ba */ 4528 m = merge->rowmap->n; 4529 for (i=0; i<m; i++) { 4530 arow = owners[rank] + i; 4531 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4532 bnzi = bi[i+1] - bi[i]; 4533 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4534 4535 /* add local non-zero vals of this proc's seqmat into ba */ 4536 anzi = ai[arow+1] - ai[arow]; 4537 aj = a->j + ai[arow]; 4538 aa = a->a + ai[arow]; 4539 nextaj = 0; 4540 for (j=0; nextaj<anzi; j++) { 4541 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4542 ba_i[j] += aa[nextaj++]; 4543 } 4544 } 4545 4546 /* add received vals into ba */ 4547 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4548 /* i-th row */ 4549 if (i == *nextrow[k]) { 4550 anzi = *(nextai[k]+1) - *nextai[k]; 4551 aj = buf_rj[k] + *(nextai[k]); 4552 aa = abuf_r[k] + *(nextai[k]); 4553 nextaj = 0; 4554 for (j=0; nextaj<anzi; j++) { 4555 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4556 ba_i[j] += aa[nextaj++]; 4557 } 4558 } 4559 nextrow[k]++; nextai[k]++; 4560 } 4561 } 4562 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4563 } 4564 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4565 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4566 4567 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4568 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4569 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4570 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4571 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4572 PetscFunctionReturn(0); 4573 } 4574 4575 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4576 { 4577 PetscErrorCode ierr; 4578 Mat B_mpi; 4579 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4580 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4581 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4582 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4583 PetscInt len,proc,*dnz,*onz,bs,cbs; 4584 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4585 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4586 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4587 MPI_Status *status; 4588 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4589 PetscBT lnkbt; 4590 Mat_Merge_SeqsToMPI *merge; 4591 PetscContainer container; 4592 4593 PetscFunctionBegin; 4594 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4595 4596 /* make sure it is a PETSc comm */ 4597 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4598 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4599 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4600 4601 ierr = PetscNew(&merge);CHKERRQ(ierr); 4602 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4603 4604 /* determine row ownership */ 4605 /*---------------------------------------------------------*/ 4606 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4607 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4608 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4609 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4610 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4611 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4612 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4613 4614 m = merge->rowmap->n; 4615 owners = merge->rowmap->range; 4616 4617 /* determine the number of messages to send, their lengths */ 4618 /*---------------------------------------------------------*/ 4619 len_s = merge->len_s; 4620 4621 len = 0; /* length of buf_si[] */ 4622 merge->nsend = 0; 4623 for (proc=0; proc<size; proc++) { 4624 len_si[proc] = 0; 4625 if (proc == rank) { 4626 len_s[proc] = 0; 4627 } else { 4628 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4629 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4630 } 4631 if (len_s[proc]) { 4632 merge->nsend++; 4633 nrows = 0; 4634 for (i=owners[proc]; i<owners[proc+1]; i++) { 4635 if (ai[i+1] > ai[i]) nrows++; 4636 } 4637 len_si[proc] = 2*(nrows+1); 4638 len += len_si[proc]; 4639 } 4640 } 4641 4642 /* determine the number and length of messages to receive for ij-structure */ 4643 /*-------------------------------------------------------------------------*/ 4644 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4645 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4646 4647 /* post the Irecv of j-structure */ 4648 /*-------------------------------*/ 4649 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4650 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4651 4652 /* post the Isend of j-structure */ 4653 /*--------------------------------*/ 4654 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4655 4656 for (proc=0, k=0; proc<size; proc++) { 4657 if (!len_s[proc]) continue; 4658 i = owners[proc]; 4659 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4660 k++; 4661 } 4662 4663 /* receives and sends of j-structure are complete */ 4664 /*------------------------------------------------*/ 4665 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4666 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4667 4668 /* send and recv i-structure */ 4669 /*---------------------------*/ 4670 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4671 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4672 4673 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4674 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4675 for (proc=0,k=0; proc<size; proc++) { 4676 if (!len_s[proc]) continue; 4677 /* form outgoing message for i-structure: 4678 buf_si[0]: nrows to be sent 4679 [1:nrows]: row index (global) 4680 [nrows+1:2*nrows+1]: i-structure index 4681 */ 4682 /*-------------------------------------------*/ 4683 nrows = len_si[proc]/2 - 1; 4684 buf_si_i = buf_si + nrows+1; 4685 buf_si[0] = nrows; 4686 buf_si_i[0] = 0; 4687 nrows = 0; 4688 for (i=owners[proc]; i<owners[proc+1]; i++) { 4689 anzi = ai[i+1] - ai[i]; 4690 if (anzi) { 4691 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4692 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4693 nrows++; 4694 } 4695 } 4696 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4697 k++; 4698 buf_si += len_si[proc]; 4699 } 4700 4701 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4702 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4703 4704 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4705 for (i=0; i<merge->nrecv; i++) { 4706 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4707 } 4708 4709 ierr = PetscFree(len_si);CHKERRQ(ierr); 4710 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4711 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4712 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4713 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4714 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4715 ierr = PetscFree(status);CHKERRQ(ierr); 4716 4717 /* compute a local seq matrix in each processor */ 4718 /*----------------------------------------------*/ 4719 /* allocate bi array and free space for accumulating nonzero column info */ 4720 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4721 bi[0] = 0; 4722 4723 /* create and initialize a linked list */ 4724 nlnk = N+1; 4725 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4726 4727 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4728 len = ai[owners[rank+1]] - ai[owners[rank]]; 4729 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4730 4731 current_space = free_space; 4732 4733 /* determine symbolic info for each local row */ 4734 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4735 4736 for (k=0; k<merge->nrecv; k++) { 4737 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4738 nrows = *buf_ri_k[k]; 4739 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4740 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4741 } 4742 4743 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4744 len = 0; 4745 for (i=0; i<m; i++) { 4746 bnzi = 0; 4747 /* add local non-zero cols of this proc's seqmat into lnk */ 4748 arow = owners[rank] + i; 4749 anzi = ai[arow+1] - ai[arow]; 4750 aj = a->j + ai[arow]; 4751 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4752 bnzi += nlnk; 4753 /* add received col data into lnk */ 4754 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4755 if (i == *nextrow[k]) { /* i-th row */ 4756 anzi = *(nextai[k]+1) - *nextai[k]; 4757 aj = buf_rj[k] + *nextai[k]; 4758 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4759 bnzi += nlnk; 4760 nextrow[k]++; nextai[k]++; 4761 } 4762 } 4763 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4764 4765 /* if free space is not available, make more free space */ 4766 if (current_space->local_remaining<bnzi) { 4767 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4768 nspacedouble++; 4769 } 4770 /* copy data into free space, then initialize lnk */ 4771 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4772 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4773 4774 current_space->array += bnzi; 4775 current_space->local_used += bnzi; 4776 current_space->local_remaining -= bnzi; 4777 4778 bi[i+1] = bi[i] + bnzi; 4779 } 4780 4781 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4782 4783 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4784 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4785 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4786 4787 /* create symbolic parallel matrix B_mpi */ 4788 /*---------------------------------------*/ 4789 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4790 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4791 if (n==PETSC_DECIDE) { 4792 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4793 } else { 4794 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4795 } 4796 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4797 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4798 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4799 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4800 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4801 4802 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4803 B_mpi->assembled = PETSC_FALSE; 4804 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4805 merge->bi = bi; 4806 merge->bj = bj; 4807 merge->buf_ri = buf_ri; 4808 merge->buf_rj = buf_rj; 4809 merge->coi = NULL; 4810 merge->coj = NULL; 4811 merge->owners_co = NULL; 4812 4813 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4814 4815 /* attach the supporting struct to B_mpi for reuse */ 4816 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4817 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4818 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4819 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4820 *mpimat = B_mpi; 4821 4822 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4823 PetscFunctionReturn(0); 4824 } 4825 4826 /*@C 4827 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4828 matrices from each processor 4829 4830 Collective on MPI_Comm 4831 4832 Input Parameters: 4833 + comm - the communicators the parallel matrix will live on 4834 . seqmat - the input sequential matrices 4835 . m - number of local rows (or PETSC_DECIDE) 4836 . n - number of local columns (or PETSC_DECIDE) 4837 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4838 4839 Output Parameter: 4840 . mpimat - the parallel matrix generated 4841 4842 Level: advanced 4843 4844 Notes: 4845 The dimensions of the sequential matrix in each processor MUST be the same. 4846 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4847 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4848 @*/ 4849 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4850 { 4851 PetscErrorCode ierr; 4852 PetscMPIInt size; 4853 4854 PetscFunctionBegin; 4855 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4856 if (size == 1) { 4857 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4858 if (scall == MAT_INITIAL_MATRIX) { 4859 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4860 } else { 4861 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4862 } 4863 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4864 PetscFunctionReturn(0); 4865 } 4866 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4867 if (scall == MAT_INITIAL_MATRIX) { 4868 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4869 } 4870 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4871 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4872 PetscFunctionReturn(0); 4873 } 4874 4875 /*@ 4876 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4877 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4878 with MatGetSize() 4879 4880 Not Collective 4881 4882 Input Parameters: 4883 + A - the matrix 4884 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4885 4886 Output Parameter: 4887 . A_loc - the local sequential matrix generated 4888 4889 Level: developer 4890 4891 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4892 4893 @*/ 4894 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4895 { 4896 PetscErrorCode ierr; 4897 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4898 Mat_SeqAIJ *mat,*a,*b; 4899 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4900 MatScalar *aa,*ba,*cam; 4901 PetscScalar *ca; 4902 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4903 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4904 PetscBool match; 4905 MPI_Comm comm; 4906 PetscMPIInt size; 4907 4908 PetscFunctionBegin; 4909 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4910 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4911 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4912 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4913 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4914 4915 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4916 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4917 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4918 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4919 aa = a->a; ba = b->a; 4920 if (scall == MAT_INITIAL_MATRIX) { 4921 if (size == 1) { 4922 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4923 PetscFunctionReturn(0); 4924 } 4925 4926 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4927 ci[0] = 0; 4928 for (i=0; i<am; i++) { 4929 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4930 } 4931 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4932 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4933 k = 0; 4934 for (i=0; i<am; i++) { 4935 ncols_o = bi[i+1] - bi[i]; 4936 ncols_d = ai[i+1] - ai[i]; 4937 /* off-diagonal portion of A */ 4938 for (jo=0; jo<ncols_o; jo++) { 4939 col = cmap[*bj]; 4940 if (col >= cstart) break; 4941 cj[k] = col; bj++; 4942 ca[k++] = *ba++; 4943 } 4944 /* diagonal portion of A */ 4945 for (j=0; j<ncols_d; j++) { 4946 cj[k] = cstart + *aj++; 4947 ca[k++] = *aa++; 4948 } 4949 /* off-diagonal portion of A */ 4950 for (j=jo; j<ncols_o; j++) { 4951 cj[k] = cmap[*bj++]; 4952 ca[k++] = *ba++; 4953 } 4954 } 4955 /* put together the new matrix */ 4956 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4957 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4958 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4959 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4960 mat->free_a = PETSC_TRUE; 4961 mat->free_ij = PETSC_TRUE; 4962 mat->nonew = 0; 4963 } else if (scall == MAT_REUSE_MATRIX) { 4964 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4965 ci = mat->i; cj = mat->j; cam = mat->a; 4966 for (i=0; i<am; i++) { 4967 /* off-diagonal portion of A */ 4968 ncols_o = bi[i+1] - bi[i]; 4969 for (jo=0; jo<ncols_o; jo++) { 4970 col = cmap[*bj]; 4971 if (col >= cstart) break; 4972 *cam++ = *ba++; bj++; 4973 } 4974 /* diagonal portion of A */ 4975 ncols_d = ai[i+1] - ai[i]; 4976 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4977 /* off-diagonal portion of A */ 4978 for (j=jo; j<ncols_o; j++) { 4979 *cam++ = *ba++; bj++; 4980 } 4981 } 4982 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4983 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4984 PetscFunctionReturn(0); 4985 } 4986 4987 /*@C 4988 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4989 4990 Not Collective 4991 4992 Input Parameters: 4993 + A - the matrix 4994 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4995 - row, col - index sets of rows and columns to extract (or NULL) 4996 4997 Output Parameter: 4998 . A_loc - the local sequential matrix generated 4999 5000 Level: developer 5001 5002 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5003 5004 @*/ 5005 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5006 { 5007 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5008 PetscErrorCode ierr; 5009 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5010 IS isrowa,iscola; 5011 Mat *aloc; 5012 PetscBool match; 5013 5014 PetscFunctionBegin; 5015 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5016 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5017 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5018 if (!row) { 5019 start = A->rmap->rstart; end = A->rmap->rend; 5020 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5021 } else { 5022 isrowa = *row; 5023 } 5024 if (!col) { 5025 start = A->cmap->rstart; 5026 cmap = a->garray; 5027 nzA = a->A->cmap->n; 5028 nzB = a->B->cmap->n; 5029 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5030 ncols = 0; 5031 for (i=0; i<nzB; i++) { 5032 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5033 else break; 5034 } 5035 imark = i; 5036 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5037 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5038 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5039 } else { 5040 iscola = *col; 5041 } 5042 if (scall != MAT_INITIAL_MATRIX) { 5043 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5044 aloc[0] = *A_loc; 5045 } 5046 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5047 *A_loc = aloc[0]; 5048 ierr = PetscFree(aloc);CHKERRQ(ierr); 5049 if (!row) { 5050 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5051 } 5052 if (!col) { 5053 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5054 } 5055 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5056 PetscFunctionReturn(0); 5057 } 5058 5059 /*@C 5060 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5061 5062 Collective on Mat 5063 5064 Input Parameters: 5065 + A,B - the matrices in mpiaij format 5066 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5067 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5068 5069 Output Parameter: 5070 + rowb, colb - index sets of rows and columns of B to extract 5071 - B_seq - the sequential matrix generated 5072 5073 Level: developer 5074 5075 @*/ 5076 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5077 { 5078 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5079 PetscErrorCode ierr; 5080 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5081 IS isrowb,iscolb; 5082 Mat *bseq=NULL; 5083 5084 PetscFunctionBegin; 5085 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5086 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5087 } 5088 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5089 5090 if (scall == MAT_INITIAL_MATRIX) { 5091 start = A->cmap->rstart; 5092 cmap = a->garray; 5093 nzA = a->A->cmap->n; 5094 nzB = a->B->cmap->n; 5095 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5096 ncols = 0; 5097 for (i=0; i<nzB; i++) { /* row < local row index */ 5098 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5099 else break; 5100 } 5101 imark = i; 5102 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5103 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5104 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5105 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5106 } else { 5107 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5108 isrowb = *rowb; iscolb = *colb; 5109 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5110 bseq[0] = *B_seq; 5111 } 5112 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5113 *B_seq = bseq[0]; 5114 ierr = PetscFree(bseq);CHKERRQ(ierr); 5115 if (!rowb) { 5116 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5117 } else { 5118 *rowb = isrowb; 5119 } 5120 if (!colb) { 5121 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5122 } else { 5123 *colb = iscolb; 5124 } 5125 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5126 PetscFunctionReturn(0); 5127 } 5128 5129 /* 5130 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5131 of the OFF-DIAGONAL portion of local A 5132 5133 Collective on Mat 5134 5135 Input Parameters: 5136 + A,B - the matrices in mpiaij format 5137 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5138 5139 Output Parameter: 5140 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5141 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5142 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5143 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5144 5145 Level: developer 5146 5147 */ 5148 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5149 { 5150 VecScatter_MPI_General *gen_to,*gen_from; 5151 PetscErrorCode ierr; 5152 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5153 Mat_SeqAIJ *b_oth; 5154 VecScatter ctx; 5155 MPI_Comm comm; 5156 PetscMPIInt *rprocs,*sprocs,tag,rank; 5157 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5158 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5159 PetscScalar *b_otha,*bufa,*bufA,*vals; 5160 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5161 MPI_Request *rwaits = NULL,*swaits = NULL; 5162 MPI_Status *sstatus,rstatus; 5163 PetscMPIInt jj,size; 5164 VecScatterType type; 5165 PetscBool mpi1; 5166 5167 PetscFunctionBegin; 5168 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5169 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5170 5171 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5172 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5173 } 5174 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5175 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5176 5177 if (size == 1) { 5178 startsj_s = NULL; 5179 bufa_ptr = NULL; 5180 *B_oth = NULL; 5181 PetscFunctionReturn(0); 5182 } 5183 5184 ctx = a->Mvctx; 5185 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5186 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5187 if (!mpi1) { 5188 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5189 thus create a->Mvctx_mpi1 */ 5190 if (!a->Mvctx_mpi1) { 5191 a->Mvctx_mpi1_flg = PETSC_TRUE; 5192 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5193 } 5194 ctx = a->Mvctx_mpi1; 5195 } 5196 tag = ((PetscObject)ctx)->tag; 5197 5198 gen_to = (VecScatter_MPI_General*)ctx->todata; 5199 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5200 nrecvs = gen_from->n; 5201 nsends = gen_to->n; 5202 5203 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5204 srow = gen_to->indices; /* local row index to be sent */ 5205 sstarts = gen_to->starts; 5206 sprocs = gen_to->procs; 5207 sstatus = gen_to->sstatus; 5208 sbs = gen_to->bs; 5209 rstarts = gen_from->starts; 5210 rprocs = gen_from->procs; 5211 rbs = gen_from->bs; 5212 5213 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5214 if (scall == MAT_INITIAL_MATRIX) { 5215 /* i-array */ 5216 /*---------*/ 5217 /* post receives */ 5218 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5219 for (i=0; i<nrecvs; i++) { 5220 rowlen = rvalues + rstarts[i]*rbs; 5221 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5222 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5223 } 5224 5225 /* pack the outgoing message */ 5226 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5227 5228 sstartsj[0] = 0; 5229 rstartsj[0] = 0; 5230 len = 0; /* total length of j or a array to be sent */ 5231 k = 0; 5232 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5233 for (i=0; i<nsends; i++) { 5234 rowlen = svalues + sstarts[i]*sbs; 5235 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5236 for (j=0; j<nrows; j++) { 5237 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5238 for (l=0; l<sbs; l++) { 5239 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5240 5241 rowlen[j*sbs+l] = ncols; 5242 5243 len += ncols; 5244 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5245 } 5246 k++; 5247 } 5248 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5249 5250 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5251 } 5252 /* recvs and sends of i-array are completed */ 5253 i = nrecvs; 5254 while (i--) { 5255 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5256 } 5257 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5258 ierr = PetscFree(svalues);CHKERRQ(ierr); 5259 5260 /* allocate buffers for sending j and a arrays */ 5261 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5262 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5263 5264 /* create i-array of B_oth */ 5265 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5266 5267 b_othi[0] = 0; 5268 len = 0; /* total length of j or a array to be received */ 5269 k = 0; 5270 for (i=0; i<nrecvs; i++) { 5271 rowlen = rvalues + rstarts[i]*rbs; 5272 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5273 for (j=0; j<nrows; j++) { 5274 b_othi[k+1] = b_othi[k] + rowlen[j]; 5275 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5276 k++; 5277 } 5278 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5279 } 5280 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5281 5282 /* allocate space for j and a arrrays of B_oth */ 5283 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5284 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5285 5286 /* j-array */ 5287 /*---------*/ 5288 /* post receives of j-array */ 5289 for (i=0; i<nrecvs; i++) { 5290 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5291 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5292 } 5293 5294 /* pack the outgoing message j-array */ 5295 k = 0; 5296 for (i=0; i<nsends; i++) { 5297 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5298 bufJ = bufj+sstartsj[i]; 5299 for (j=0; j<nrows; j++) { 5300 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5301 for (ll=0; ll<sbs; ll++) { 5302 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5303 for (l=0; l<ncols; l++) { 5304 *bufJ++ = cols[l]; 5305 } 5306 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5307 } 5308 } 5309 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5310 } 5311 5312 /* recvs and sends of j-array are completed */ 5313 i = nrecvs; 5314 while (i--) { 5315 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5316 } 5317 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5318 } else if (scall == MAT_REUSE_MATRIX) { 5319 sstartsj = *startsj_s; 5320 rstartsj = *startsj_r; 5321 bufa = *bufa_ptr; 5322 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5323 b_otha = b_oth->a; 5324 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5325 5326 /* a-array */ 5327 /*---------*/ 5328 /* post receives of a-array */ 5329 for (i=0; i<nrecvs; i++) { 5330 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5331 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5332 } 5333 5334 /* pack the outgoing message a-array */ 5335 k = 0; 5336 for (i=0; i<nsends; i++) { 5337 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5338 bufA = bufa+sstartsj[i]; 5339 for (j=0; j<nrows; j++) { 5340 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5341 for (ll=0; ll<sbs; ll++) { 5342 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5343 for (l=0; l<ncols; l++) { 5344 *bufA++ = vals[l]; 5345 } 5346 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5347 } 5348 } 5349 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5350 } 5351 /* recvs and sends of a-array are completed */ 5352 i = nrecvs; 5353 while (i--) { 5354 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5355 } 5356 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5357 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5358 5359 if (scall == MAT_INITIAL_MATRIX) { 5360 /* put together the new matrix */ 5361 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5362 5363 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5364 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5365 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5366 b_oth->free_a = PETSC_TRUE; 5367 b_oth->free_ij = PETSC_TRUE; 5368 b_oth->nonew = 0; 5369 5370 ierr = PetscFree(bufj);CHKERRQ(ierr); 5371 if (!startsj_s || !bufa_ptr) { 5372 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5373 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5374 } else { 5375 *startsj_s = sstartsj; 5376 *startsj_r = rstartsj; 5377 *bufa_ptr = bufa; 5378 } 5379 } 5380 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5381 PetscFunctionReturn(0); 5382 } 5383 5384 /*@C 5385 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5386 5387 Not Collective 5388 5389 Input Parameters: 5390 . A - The matrix in mpiaij format 5391 5392 Output Parameter: 5393 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5394 . colmap - A map from global column index to local index into lvec 5395 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5396 5397 Level: developer 5398 5399 @*/ 5400 #if defined(PETSC_USE_CTABLE) 5401 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5402 #else 5403 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5404 #endif 5405 { 5406 Mat_MPIAIJ *a; 5407 5408 PetscFunctionBegin; 5409 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5410 PetscValidPointer(lvec, 2); 5411 PetscValidPointer(colmap, 3); 5412 PetscValidPointer(multScatter, 4); 5413 a = (Mat_MPIAIJ*) A->data; 5414 if (lvec) *lvec = a->lvec; 5415 if (colmap) *colmap = a->colmap; 5416 if (multScatter) *multScatter = a->Mvctx; 5417 PetscFunctionReturn(0); 5418 } 5419 5420 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5421 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5422 #if defined(PETSC_HAVE_MKL_SPARSE) 5423 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5424 #endif 5425 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5426 #if defined(PETSC_HAVE_ELEMENTAL) 5427 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5428 #endif 5429 #if defined(PETSC_HAVE_HYPRE) 5430 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5431 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5432 #endif 5433 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 5434 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5435 5436 /* 5437 Computes (B'*A')' since computing B*A directly is untenable 5438 5439 n p p 5440 ( ) ( ) ( ) 5441 m ( A ) * n ( B ) = m ( C ) 5442 ( ) ( ) ( ) 5443 5444 */ 5445 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5446 { 5447 PetscErrorCode ierr; 5448 Mat At,Bt,Ct; 5449 5450 PetscFunctionBegin; 5451 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5452 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5453 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5454 ierr = MatDestroy(&At);CHKERRQ(ierr); 5455 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5456 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5457 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5458 PetscFunctionReturn(0); 5459 } 5460 5461 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5462 { 5463 PetscErrorCode ierr; 5464 PetscInt m=A->rmap->n,n=B->cmap->n; 5465 Mat Cmat; 5466 5467 PetscFunctionBegin; 5468 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5469 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5470 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5471 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5472 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5473 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5474 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5475 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5476 5477 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5478 5479 *C = Cmat; 5480 PetscFunctionReturn(0); 5481 } 5482 5483 /* ----------------------------------------------------------------*/ 5484 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5485 { 5486 PetscErrorCode ierr; 5487 5488 PetscFunctionBegin; 5489 if (scall == MAT_INITIAL_MATRIX) { 5490 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5491 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5492 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5493 } 5494 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5495 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5496 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5497 PetscFunctionReturn(0); 5498 } 5499 5500 /*MC 5501 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5502 5503 Options Database Keys: 5504 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5505 5506 Level: beginner 5507 5508 .seealso: MatCreateAIJ() 5509 M*/ 5510 5511 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5512 { 5513 Mat_MPIAIJ *b; 5514 PetscErrorCode ierr; 5515 PetscMPIInt size; 5516 5517 PetscFunctionBegin; 5518 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5519 5520 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5521 B->data = (void*)b; 5522 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5523 B->assembled = PETSC_FALSE; 5524 B->insertmode = NOT_SET_VALUES; 5525 b->size = size; 5526 5527 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5528 5529 /* build cache for off array entries formed */ 5530 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5531 5532 b->donotstash = PETSC_FALSE; 5533 b->colmap = 0; 5534 b->garray = 0; 5535 b->roworiented = PETSC_TRUE; 5536 5537 /* stuff used for matrix vector multiply */ 5538 b->lvec = NULL; 5539 b->Mvctx = NULL; 5540 5541 /* stuff for MatGetRow() */ 5542 b->rowindices = 0; 5543 b->rowvalues = 0; 5544 b->getrowactive = PETSC_FALSE; 5545 5546 /* flexible pointer used in CUSP/CUSPARSE classes */ 5547 b->spptr = NULL; 5548 5549 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5550 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5551 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5552 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5553 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5554 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5555 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5556 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5557 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5558 #if defined(PETSC_HAVE_MKL_SPARSE) 5559 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5560 #endif 5561 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5562 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5563 #if defined(PETSC_HAVE_ELEMENTAL) 5564 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5565 #endif 5566 #if defined(PETSC_HAVE_HYPRE) 5567 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5568 #endif 5569 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5570 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5571 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5572 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5573 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5574 #if defined(PETSC_HAVE_HYPRE) 5575 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5576 #endif 5577 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5578 PetscFunctionReturn(0); 5579 } 5580 5581 /*@C 5582 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5583 and "off-diagonal" part of the matrix in CSR format. 5584 5585 Collective on MPI_Comm 5586 5587 Input Parameters: 5588 + comm - MPI communicator 5589 . m - number of local rows (Cannot be PETSC_DECIDE) 5590 . n - This value should be the same as the local size used in creating the 5591 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5592 calculated if N is given) For square matrices n is almost always m. 5593 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5594 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5595 . i - row indices for "diagonal" portion of matrix 5596 . j - column indices 5597 . a - matrix values 5598 . oi - row indices for "off-diagonal" portion of matrix 5599 . oj - column indices 5600 - oa - matrix values 5601 5602 Output Parameter: 5603 . mat - the matrix 5604 5605 Level: advanced 5606 5607 Notes: 5608 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5609 must free the arrays once the matrix has been destroyed and not before. 5610 5611 The i and j indices are 0 based 5612 5613 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5614 5615 This sets local rows and cannot be used to set off-processor values. 5616 5617 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5618 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5619 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5620 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5621 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5622 communication if it is known that only local entries will be set. 5623 5624 .keywords: matrix, aij, compressed row, sparse, parallel 5625 5626 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5627 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5628 @*/ 5629 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5630 { 5631 PetscErrorCode ierr; 5632 Mat_MPIAIJ *maij; 5633 5634 PetscFunctionBegin; 5635 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5636 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5637 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5638 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5639 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5640 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5641 maij = (Mat_MPIAIJ*) (*mat)->data; 5642 5643 (*mat)->preallocated = PETSC_TRUE; 5644 5645 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5646 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5647 5648 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5649 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5650 5651 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5652 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5653 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5654 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5655 5656 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5657 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5658 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5659 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5660 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5661 PetscFunctionReturn(0); 5662 } 5663 5664 /* 5665 Special version for direct calls from Fortran 5666 */ 5667 #include <petsc/private/fortranimpl.h> 5668 5669 /* Change these macros so can be used in void function */ 5670 #undef CHKERRQ 5671 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5672 #undef SETERRQ2 5673 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5674 #undef SETERRQ3 5675 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5676 #undef SETERRQ 5677 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5678 5679 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5680 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5681 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5682 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5683 #else 5684 #endif 5685 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5686 { 5687 Mat mat = *mmat; 5688 PetscInt m = *mm, n = *mn; 5689 InsertMode addv = *maddv; 5690 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5691 PetscScalar value; 5692 PetscErrorCode ierr; 5693 5694 MatCheckPreallocated(mat,1); 5695 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5696 5697 #if defined(PETSC_USE_DEBUG) 5698 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5699 #endif 5700 { 5701 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5702 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5703 PetscBool roworiented = aij->roworiented; 5704 5705 /* Some Variables required in the macro */ 5706 Mat A = aij->A; 5707 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5708 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5709 MatScalar *aa = a->a; 5710 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5711 Mat B = aij->B; 5712 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5713 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5714 MatScalar *ba = b->a; 5715 5716 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5717 PetscInt nonew = a->nonew; 5718 MatScalar *ap1,*ap2; 5719 5720 PetscFunctionBegin; 5721 for (i=0; i<m; i++) { 5722 if (im[i] < 0) continue; 5723 #if defined(PETSC_USE_DEBUG) 5724 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5725 #endif 5726 if (im[i] >= rstart && im[i] < rend) { 5727 row = im[i] - rstart; 5728 lastcol1 = -1; 5729 rp1 = aj + ai[row]; 5730 ap1 = aa + ai[row]; 5731 rmax1 = aimax[row]; 5732 nrow1 = ailen[row]; 5733 low1 = 0; 5734 high1 = nrow1; 5735 lastcol2 = -1; 5736 rp2 = bj + bi[row]; 5737 ap2 = ba + bi[row]; 5738 rmax2 = bimax[row]; 5739 nrow2 = bilen[row]; 5740 low2 = 0; 5741 high2 = nrow2; 5742 5743 for (j=0; j<n; j++) { 5744 if (roworiented) value = v[i*n+j]; 5745 else value = v[i+j*m]; 5746 if (in[j] >= cstart && in[j] < cend) { 5747 col = in[j] - cstart; 5748 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5749 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5750 } else if (in[j] < 0) continue; 5751 #if defined(PETSC_USE_DEBUG) 5752 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5753 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5754 #endif 5755 else { 5756 if (mat->was_assembled) { 5757 if (!aij->colmap) { 5758 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5759 } 5760 #if defined(PETSC_USE_CTABLE) 5761 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5762 col--; 5763 #else 5764 col = aij->colmap[in[j]] - 1; 5765 #endif 5766 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5767 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5768 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5769 col = in[j]; 5770 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5771 B = aij->B; 5772 b = (Mat_SeqAIJ*)B->data; 5773 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5774 rp2 = bj + bi[row]; 5775 ap2 = ba + bi[row]; 5776 rmax2 = bimax[row]; 5777 nrow2 = bilen[row]; 5778 low2 = 0; 5779 high2 = nrow2; 5780 bm = aij->B->rmap->n; 5781 ba = b->a; 5782 } 5783 } else col = in[j]; 5784 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5785 } 5786 } 5787 } else if (!aij->donotstash) { 5788 if (roworiented) { 5789 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5790 } else { 5791 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5792 } 5793 } 5794 } 5795 } 5796 PetscFunctionReturnVoid(); 5797 } 5798 5799