1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 22 enough exist. 23 24 Level: beginner 25 26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 27 M*/ 28 29 /*MC 30 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 31 32 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 33 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 34 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 35 for communicators controlling multiple processes. It is recommended that you call both of 36 the above preallocation routines for simplicity. 37 38 Options Database Keys: 39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 40 41 Level: beginner 42 43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 44 M*/ 45 46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 51 PetscFunctionBegin; 52 if (mat->A) { 53 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 54 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 55 } 56 PetscFunctionReturn(0); 57 } 58 59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 60 { 61 PetscErrorCode ierr; 62 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 63 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 64 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 65 const PetscInt *ia,*ib; 66 const MatScalar *aa,*bb; 67 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 68 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 69 70 PetscFunctionBegin; 71 *keptrows = 0; 72 ia = a->i; 73 ib = b->i; 74 for (i=0; i<m; i++) { 75 na = ia[i+1] - ia[i]; 76 nb = ib[i+1] - ib[i]; 77 if (!na && !nb) { 78 cnt++; 79 goto ok1; 80 } 81 aa = a->a + ia[i]; 82 for (j=0; j<na; j++) { 83 if (aa[j] != 0.0) goto ok1; 84 } 85 bb = b->a + ib[i]; 86 for (j=0; j <nb; j++) { 87 if (bb[j] != 0.0) goto ok1; 88 } 89 cnt++; 90 ok1:; 91 } 92 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 93 if (!n0rows) PetscFunctionReturn(0); 94 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 95 cnt = 0; 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) continue; 100 aa = a->a + ia[i]; 101 for (j=0; j<na;j++) { 102 if (aa[j] != 0.0) { 103 rows[cnt++] = rstart + i; 104 goto ok2; 105 } 106 } 107 bb = b->a + ib[i]; 108 for (j=0; j<nb; j++) { 109 if (bb[j] != 0.0) { 110 rows[cnt++] = rstart + i; 111 goto ok2; 112 } 113 } 114 ok2:; 115 } 116 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 121 { 122 PetscErrorCode ierr; 123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 124 125 PetscFunctionBegin; 126 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 127 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 128 } else { 129 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 130 } 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 135 { 136 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 137 PetscErrorCode ierr; 138 PetscInt i,rstart,nrows,*rows; 139 140 PetscFunctionBegin; 141 *zrows = NULL; 142 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 143 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 144 for (i=0; i<nrows; i++) rows[i] += rstart; 145 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 153 PetscInt i,n,*garray = aij->garray; 154 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 155 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 156 PetscReal *work; 157 158 PetscFunctionBegin; 159 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 160 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 161 if (type == NORM_2) { 162 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 163 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 164 } 165 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 166 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 167 } 168 } else if (type == NORM_1) { 169 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 170 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 171 } 172 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 173 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 174 } 175 } else if (type == NORM_INFINITY) { 176 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 177 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 178 } 179 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 180 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 181 } 182 183 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 184 if (type == NORM_INFINITY) { 185 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 186 } else { 187 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 188 } 189 ierr = PetscFree(work);CHKERRQ(ierr); 190 if (type == NORM_2) { 191 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 197 { 198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 199 IS sis,gis; 200 PetscErrorCode ierr; 201 const PetscInt *isis,*igis; 202 PetscInt n,*iis,nsis,ngis,rstart,i; 203 204 PetscFunctionBegin; 205 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 206 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 207 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 208 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 209 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 210 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 211 212 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 213 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 215 n = ngis + nsis; 216 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 217 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 218 for (i=0; i<n; i++) iis[i] += rstart; 219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 220 221 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 222 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 223 ierr = ISDestroy(&sis);CHKERRQ(ierr); 224 ierr = ISDestroy(&gis);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 /* 229 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 230 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 231 232 Only for square matrices 233 234 Used by a preconditioner, hence PETSC_EXTERN 235 */ 236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 237 { 238 PetscMPIInt rank,size; 239 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 240 PetscErrorCode ierr; 241 Mat mat; 242 Mat_SeqAIJ *gmata; 243 PetscMPIInt tag; 244 MPI_Status status; 245 PetscBool aij; 246 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 247 248 PetscFunctionBegin; 249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 250 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 251 if (!rank) { 252 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 253 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 254 } 255 if (reuse == MAT_INITIAL_MATRIX) { 256 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 257 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 258 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 259 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 260 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 261 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 262 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 263 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 264 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 265 266 rowners[0] = 0; 267 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 268 rstart = rowners[rank]; 269 rend = rowners[rank+1]; 270 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 271 if (!rank) { 272 gmata = (Mat_SeqAIJ*) gmat->data; 273 /* send row lengths to all processors */ 274 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 275 for (i=1; i<size; i++) { 276 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 277 } 278 /* determine number diagonal and off-diagonal counts */ 279 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 280 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 281 jj = 0; 282 for (i=0; i<m; i++) { 283 for (j=0; j<dlens[i]; j++) { 284 if (gmata->j[jj] < rstart) ld[i]++; 285 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 286 jj++; 287 } 288 } 289 /* send column indices to other processes */ 290 for (i=1; i<size; i++) { 291 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 292 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 } 295 296 /* send numerical values to other processes */ 297 for (i=1; i<size; i++) { 298 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 299 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 300 } 301 gmataa = gmata->a; 302 gmataj = gmata->j; 303 304 } else { 305 /* receive row lengths */ 306 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 307 /* receive column indices */ 308 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 309 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 310 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* determine number diagonal and off-diagonal counts */ 312 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 313 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 314 jj = 0; 315 for (i=0; i<m; i++) { 316 for (j=0; j<dlens[i]; j++) { 317 if (gmataj[jj] < rstart) ld[i]++; 318 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 319 jj++; 320 } 321 } 322 /* receive numerical values */ 323 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 324 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 325 } 326 /* set preallocation */ 327 for (i=0; i<m; i++) { 328 dlens[i] -= olens[i]; 329 } 330 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 331 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 332 333 for (i=0; i<m; i++) { 334 dlens[i] += olens[i]; 335 } 336 cnt = 0; 337 for (i=0; i<m; i++) { 338 row = rstart + i; 339 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 340 cnt += dlens[i]; 341 } 342 if (rank) { 343 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 344 } 345 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 346 ierr = PetscFree(rowners);CHKERRQ(ierr); 347 348 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 349 350 *inmat = mat; 351 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 352 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 353 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 354 mat = *inmat; 355 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 356 if (!rank) { 357 /* send numerical values to other processes */ 358 gmata = (Mat_SeqAIJ*) gmat->data; 359 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 360 gmataa = gmata->a; 361 for (i=1; i<size; i++) { 362 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 363 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 364 } 365 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 366 } else { 367 /* receive numerical values from process 0*/ 368 nz = Ad->nz + Ao->nz; 369 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 370 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 371 } 372 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 373 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 374 ad = Ad->a; 375 ao = Ao->a; 376 if (mat->rmap->n) { 377 i = 0; 378 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 379 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 380 } 381 for (i=1; i<mat->rmap->n; i++) { 382 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 i--; 386 if (mat->rmap->n) { 387 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 388 } 389 if (rank) { 390 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 391 } 392 } 393 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 /* 399 Local utility routine that creates a mapping from the global column 400 number to the local number in the off-diagonal part of the local 401 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 402 a slightly higher hash table cost; without it it is not scalable (each processor 403 has an order N integer array but is fast to acess. 404 */ 405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 406 { 407 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscInt n = aij->B->cmap->n,i; 410 411 PetscFunctionBegin; 412 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 413 #if defined(PETSC_USE_CTABLE) 414 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 415 for (i=0; i<n; i++) { 416 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 417 } 418 #else 419 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 420 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 421 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 422 #endif 423 PetscFunctionReturn(0); 424 } 425 426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 427 { \ 428 if (col <= lastcol1) low1 = 0; \ 429 else high1 = nrow1; \ 430 lastcol1 = col;\ 431 while (high1-low1 > 5) { \ 432 t = (low1+high1)/2; \ 433 if (rp1[t] > col) high1 = t; \ 434 else low1 = t; \ 435 } \ 436 for (_i=low1; _i<high1; _i++) { \ 437 if (rp1[_i] > col) break; \ 438 if (rp1[_i] == col) { \ 439 if (addv == ADD_VALUES) ap1[_i] += value; \ 440 else ap1[_i] = value; \ 441 goto a_noinsert; \ 442 } \ 443 } \ 444 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 445 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 446 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 447 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 448 N = nrow1++ - 1; a->nz++; high1++; \ 449 /* shift up all the later entries in this row */ \ 450 for (ii=N; ii>=_i; ii--) { \ 451 rp1[ii+1] = rp1[ii]; \ 452 ap1[ii+1] = ap1[ii]; \ 453 } \ 454 rp1[_i] = col; \ 455 ap1[_i] = value; \ 456 A->nonzerostate++;\ 457 a_noinsert: ; \ 458 ailen[row] = nrow1; \ 459 } 460 461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 462 { \ 463 if (col <= lastcol2) low2 = 0; \ 464 else high2 = nrow2; \ 465 lastcol2 = col; \ 466 while (high2-low2 > 5) { \ 467 t = (low2+high2)/2; \ 468 if (rp2[t] > col) high2 = t; \ 469 else low2 = t; \ 470 } \ 471 for (_i=low2; _i<high2; _i++) { \ 472 if (rp2[_i] > col) break; \ 473 if (rp2[_i] == col) { \ 474 if (addv == ADD_VALUES) ap2[_i] += value; \ 475 else ap2[_i] = value; \ 476 goto b_noinsert; \ 477 } \ 478 } \ 479 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 480 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 482 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 483 N = nrow2++ - 1; b->nz++; high2++; \ 484 /* shift up all the later entries in this row */ \ 485 for (ii=N; ii>=_i; ii--) { \ 486 rp2[ii+1] = rp2[ii]; \ 487 ap2[ii+1] = ap2[ii]; \ 488 } \ 489 rp2[_i] = col; \ 490 ap2[_i] = value; \ 491 B->nonzerostate++; \ 492 b_noinsert: ; \ 493 bilen[row] = nrow2; \ 494 } 495 496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 497 { 498 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 499 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 500 PetscErrorCode ierr; 501 PetscInt l,*garray = mat->garray,diag; 502 503 PetscFunctionBegin; 504 /* code only works for square matrices A */ 505 506 /* find size of row to the left of the diagonal part */ 507 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 508 row = row - diag; 509 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 510 if (garray[b->j[b->i[row]+l]] > diag) break; 511 } 512 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 513 514 /* diagonal part */ 515 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* right of diagonal part */ 518 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 523 { 524 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 525 PetscScalar value; 526 PetscErrorCode ierr; 527 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 528 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 529 PetscBool roworiented = aij->roworiented; 530 531 /* Some Variables required in the macro */ 532 Mat A = aij->A; 533 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 534 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 535 MatScalar *aa = a->a; 536 PetscBool ignorezeroentries = a->ignorezeroentries; 537 Mat B = aij->B; 538 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 539 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 540 MatScalar *ba = b->a; 541 542 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 543 PetscInt nonew; 544 MatScalar *ap1,*ap2; 545 546 PetscFunctionBegin; 547 for (i=0; i<m; i++) { 548 if (im[i] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 551 #endif 552 if (im[i] >= rstart && im[i] < rend) { 553 row = im[i] - rstart; 554 lastcol1 = -1; 555 rp1 = aj + ai[row]; 556 ap1 = aa + ai[row]; 557 rmax1 = aimax[row]; 558 nrow1 = ailen[row]; 559 low1 = 0; 560 high1 = nrow1; 561 lastcol2 = -1; 562 rp2 = bj + bi[row]; 563 ap2 = ba + bi[row]; 564 rmax2 = bimax[row]; 565 nrow2 = bilen[row]; 566 low2 = 0; 567 high2 = nrow2; 568 569 for (j=0; j<n; j++) { 570 if (roworiented) value = v[i*n+j]; 571 else value = v[i+j*m]; 572 if (in[j] >= cstart && in[j] < cend) { 573 col = in[j] - cstart; 574 nonew = a->nonew; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 576 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 577 } else if (in[j] < 0) continue; 578 #if defined(PETSC_USE_DEBUG) 579 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 580 #endif 581 else { 582 if (mat->was_assembled) { 583 if (!aij->colmap) { 584 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 585 } 586 #if defined(PETSC_USE_CTABLE) 587 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 593 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ*)B->data; 598 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 599 rp2 = bj + bi[row]; 600 ap2 = ba + bi[row]; 601 rmax2 = bimax[row]; 602 nrow2 = bilen[row]; 603 low2 = 0; 604 high2 = nrow2; 605 bm = aij->B->rmap->n; 606 ba = b->a; 607 } else if (col < 0) { 608 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 609 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 610 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 611 } 612 } else col = in[j]; 613 nonew = b->nonew; 614 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 615 } 616 } 617 } else { 618 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 619 if (!aij->donotstash) { 620 mat->assembled = PETSC_FALSE; 621 if (roworiented) { 622 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 623 } else { 624 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 625 } 626 } 627 } 628 } 629 PetscFunctionReturn(0); 630 } 631 632 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 633 { 634 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 635 PetscErrorCode ierr; 636 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 637 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 638 639 PetscFunctionBegin; 640 for (i=0; i<m; i++) { 641 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 642 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 643 if (idxm[i] >= rstart && idxm[i] < rend) { 644 row = idxm[i] - rstart; 645 for (j=0; j<n; j++) { 646 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 647 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 648 if (idxn[j] >= cstart && idxn[j] < cend) { 649 col = idxn[j] - cstart; 650 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 651 } else { 652 if (!aij->colmap) { 653 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 654 } 655 #if defined(PETSC_USE_CTABLE) 656 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 657 col--; 658 #else 659 col = aij->colmap[idxn[j]] - 1; 660 #endif 661 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 662 else { 663 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 664 } 665 } 666 } 667 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 668 } 669 PetscFunctionReturn(0); 670 } 671 672 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 673 674 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 675 { 676 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 677 PetscErrorCode ierr; 678 PetscInt nstash,reallocs; 679 680 PetscFunctionBegin; 681 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 682 683 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 684 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 685 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } 688 689 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 690 { 691 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 692 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 693 PetscErrorCode ierr; 694 PetscMPIInt n; 695 PetscInt i,j,rstart,ncols,flg; 696 PetscInt *row,*col; 697 PetscBool other_disassembled; 698 PetscScalar *val; 699 700 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 701 702 PetscFunctionBegin; 703 if (!aij->donotstash && !mat->nooffprocentries) { 704 while (1) { 705 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 706 if (!flg) break; 707 708 for (i=0; i<n; ) { 709 /* Now identify the consecutive vals belonging to the same row */ 710 for (j=i,rstart=row[j]; j<n; j++) { 711 if (row[j] != rstart) break; 712 } 713 if (j < n) ncols = j-i; 714 else ncols = n-i; 715 /* Now assemble all these values with a single function call */ 716 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 717 718 i = j; 719 } 720 } 721 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 722 } 723 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 724 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 725 726 /* determine if any processor has disassembled, if so we must 727 also disassemble ourselfs, in order that we may reassemble. */ 728 /* 729 if nonzero structure of submatrix B cannot change then we know that 730 no processor disassembled thus we can skip this stuff 731 */ 732 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 733 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 734 if (mat->was_assembled && !other_disassembled) { 735 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 736 } 737 } 738 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 739 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 740 } 741 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 742 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 743 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 744 745 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 746 747 aij->rowvalues = 0; 748 749 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 750 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 751 752 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 753 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 754 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 755 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 756 } 757 PetscFunctionReturn(0); 758 } 759 760 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 761 { 762 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 763 PetscErrorCode ierr; 764 765 PetscFunctionBegin; 766 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 767 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 768 PetscFunctionReturn(0); 769 } 770 771 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 772 { 773 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 774 PetscInt *lrows; 775 PetscInt r, len; 776 PetscErrorCode ierr; 777 778 PetscFunctionBegin; 779 /* get locally owned rows */ 780 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 781 /* fix right hand side if needed */ 782 if (x && b) { 783 const PetscScalar *xx; 784 PetscScalar *bb; 785 786 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 787 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 788 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 789 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 790 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 791 } 792 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 793 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 794 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 795 PetscBool cong; 796 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 797 if (cong) A->congruentlayouts = 1; 798 else A->congruentlayouts = 0; 799 } 800 if ((diag != 0.0) && A->congruentlayouts) { 801 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 802 } else if (diag != 0.0) { 803 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 804 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 805 for (r = 0; r < len; ++r) { 806 const PetscInt row = lrows[r] + A->rmap->rstart; 807 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 808 } 809 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 810 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 811 } else { 812 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 813 } 814 ierr = PetscFree(lrows);CHKERRQ(ierr); 815 816 /* only change matrix nonzero state if pattern was allowed to be changed */ 817 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 818 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 819 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 820 } 821 PetscFunctionReturn(0); 822 } 823 824 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 825 { 826 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 PetscMPIInt n = A->rmap->n; 829 PetscInt i,j,r,m,p = 0,len = 0; 830 PetscInt *lrows,*owners = A->rmap->range; 831 PetscSFNode *rrows; 832 PetscSF sf; 833 const PetscScalar *xx; 834 PetscScalar *bb,*mask; 835 Vec xmask,lmask; 836 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 837 const PetscInt *aj, *ii,*ridx; 838 PetscScalar *aa; 839 840 PetscFunctionBegin; 841 /* Create SF where leaves are input rows and roots are owned rows */ 842 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 843 for (r = 0; r < n; ++r) lrows[r] = -1; 844 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 845 for (r = 0; r < N; ++r) { 846 const PetscInt idx = rows[r]; 847 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 848 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 849 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 850 } 851 rrows[r].rank = p; 852 rrows[r].index = rows[r] - owners[p]; 853 } 854 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 855 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 856 /* Collect flags for rows to be zeroed */ 857 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 858 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 859 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 860 /* Compress and put in row numbers */ 861 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 862 /* zero diagonal part of matrix */ 863 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 864 /* handle off diagonal part of matrix */ 865 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 866 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 867 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 868 for (i=0; i<len; i++) bb[lrows[i]] = 1; 869 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 870 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 873 if (x) { 874 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 876 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 877 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 878 } 879 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 880 /* remove zeroed rows of off diagonal matrix */ 881 ii = aij->i; 882 for (i=0; i<len; i++) { 883 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 884 } 885 /* loop over all elements of off process part of matrix zeroing removed columns*/ 886 if (aij->compressedrow.use) { 887 m = aij->compressedrow.nrows; 888 ii = aij->compressedrow.i; 889 ridx = aij->compressedrow.rindex; 890 for (i=0; i<m; i++) { 891 n = ii[i+1] - ii[i]; 892 aj = aij->j + ii[i]; 893 aa = aij->a + ii[i]; 894 895 for (j=0; j<n; j++) { 896 if (PetscAbsScalar(mask[*aj])) { 897 if (b) bb[*ridx] -= *aa*xx[*aj]; 898 *aa = 0.0; 899 } 900 aa++; 901 aj++; 902 } 903 ridx++; 904 } 905 } else { /* do not use compressed row format */ 906 m = l->B->rmap->n; 907 for (i=0; i<m; i++) { 908 n = ii[i+1] - ii[i]; 909 aj = aij->j + ii[i]; 910 aa = aij->a + ii[i]; 911 for (j=0; j<n; j++) { 912 if (PetscAbsScalar(mask[*aj])) { 913 if (b) bb[i] -= *aa*xx[*aj]; 914 *aa = 0.0; 915 } 916 aa++; 917 aj++; 918 } 919 } 920 } 921 if (x) { 922 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 923 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 924 } 925 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 926 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 927 ierr = PetscFree(lrows);CHKERRQ(ierr); 928 929 /* only change matrix nonzero state if pattern was allowed to be changed */ 930 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 931 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 932 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 933 } 934 PetscFunctionReturn(0); 935 } 936 937 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 938 { 939 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 940 PetscErrorCode ierr; 941 PetscInt nt; 942 VecScatter Mvctx = a->Mvctx; 943 944 PetscFunctionBegin; 945 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 946 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 947 948 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 949 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 950 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 951 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 952 PetscFunctionReturn(0); 953 } 954 955 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 956 { 957 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 958 PetscErrorCode ierr; 959 960 PetscFunctionBegin; 961 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 965 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 966 { 967 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 968 PetscErrorCode ierr; 969 VecScatter Mvctx = a->Mvctx; 970 971 PetscFunctionBegin; 972 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 973 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 974 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 975 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 977 PetscFunctionReturn(0); 978 } 979 980 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 981 { 982 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 983 PetscErrorCode ierr; 984 PetscBool merged; 985 986 PetscFunctionBegin; 987 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 988 /* do nondiagonal part */ 989 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 990 if (!merged) { 991 /* send it on its way */ 992 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 993 /* do local part */ 994 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 995 /* receive remote parts: note this assumes the values are not actually */ 996 /* added in yy until the next line, */ 997 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 998 } else { 999 /* do local part */ 1000 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1001 /* send it on its way */ 1002 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1003 /* values actually were received in the Begin() but we need to call this nop */ 1004 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1005 } 1006 PetscFunctionReturn(0); 1007 } 1008 1009 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1010 { 1011 MPI_Comm comm; 1012 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1013 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1014 IS Me,Notme; 1015 PetscErrorCode ierr; 1016 PetscInt M,N,first,last,*notme,i; 1017 PetscMPIInt size; 1018 1019 PetscFunctionBegin; 1020 /* Easy test: symmetric diagonal block */ 1021 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1022 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1023 if (!*f) PetscFunctionReturn(0); 1024 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1025 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1026 if (size == 1) PetscFunctionReturn(0); 1027 1028 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1029 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1030 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1031 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1032 for (i=0; i<first; i++) notme[i] = i; 1033 for (i=last; i<M; i++) notme[i-last+first] = i; 1034 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1035 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1036 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1037 Aoff = Aoffs[0]; 1038 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1039 Boff = Boffs[0]; 1040 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1041 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1042 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1043 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1044 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1045 ierr = PetscFree(notme);CHKERRQ(ierr); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1050 { 1051 PetscErrorCode ierr; 1052 1053 PetscFunctionBegin; 1054 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1055 PetscFunctionReturn(0); 1056 } 1057 1058 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1059 { 1060 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1061 PetscErrorCode ierr; 1062 1063 PetscFunctionBegin; 1064 /* do nondiagonal part */ 1065 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1066 /* send it on its way */ 1067 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1068 /* do local part */ 1069 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1070 /* receive remote parts */ 1071 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1072 PetscFunctionReturn(0); 1073 } 1074 1075 /* 1076 This only works correctly for square matrices where the subblock A->A is the 1077 diagonal block 1078 */ 1079 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1080 { 1081 PetscErrorCode ierr; 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 1084 PetscFunctionBegin; 1085 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1086 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1087 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1092 { 1093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1098 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1103 { 1104 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1105 PetscErrorCode ierr; 1106 1107 PetscFunctionBegin; 1108 #if defined(PETSC_USE_LOG) 1109 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1110 #endif 1111 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1112 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1113 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1114 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1115 #if defined(PETSC_USE_CTABLE) 1116 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1117 #else 1118 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1119 #endif 1120 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1121 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1122 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1123 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1124 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1125 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1126 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1127 1128 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1134 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1135 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1136 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1137 #if defined(PETSC_HAVE_ELEMENTAL) 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1139 #endif 1140 #if defined(PETSC_HAVE_HYPRE) 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1143 #endif 1144 PetscFunctionReturn(0); 1145 } 1146 1147 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1148 { 1149 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1150 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1151 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1152 PetscErrorCode ierr; 1153 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1154 int fd; 1155 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1156 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1157 PetscScalar *column_values; 1158 PetscInt message_count,flowcontrolcount; 1159 FILE *file; 1160 1161 PetscFunctionBegin; 1162 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1163 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1164 nz = A->nz + B->nz; 1165 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1166 if (!rank) { 1167 header[0] = MAT_FILE_CLASSID; 1168 header[1] = mat->rmap->N; 1169 header[2] = mat->cmap->N; 1170 1171 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1172 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1173 /* get largest number of rows any processor has */ 1174 rlen = mat->rmap->n; 1175 range = mat->rmap->range; 1176 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1177 } else { 1178 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1179 rlen = mat->rmap->n; 1180 } 1181 1182 /* load up the local row counts */ 1183 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1184 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1185 1186 /* store the row lengths to the file */ 1187 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1188 if (!rank) { 1189 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1190 for (i=1; i<size; i++) { 1191 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1192 rlen = range[i+1] - range[i]; 1193 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1194 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1195 } 1196 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1197 } else { 1198 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1199 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1200 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1201 } 1202 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1203 1204 /* load up the local column indices */ 1205 nzmax = nz; /* th processor needs space a largest processor needs */ 1206 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1207 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1208 cnt = 0; 1209 for (i=0; i<mat->rmap->n; i++) { 1210 for (j=B->i[i]; j<B->i[i+1]; j++) { 1211 if ((col = garray[B->j[j]]) > cstart) break; 1212 column_indices[cnt++] = col; 1213 } 1214 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1215 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1216 } 1217 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1218 1219 /* store the column indices to the file */ 1220 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1221 if (!rank) { 1222 MPI_Status status; 1223 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1224 for (i=1; i<size; i++) { 1225 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1226 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1227 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1228 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1229 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1230 } 1231 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1232 } else { 1233 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1234 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1235 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1236 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1237 } 1238 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1239 1240 /* load up the local column values */ 1241 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1242 cnt = 0; 1243 for (i=0; i<mat->rmap->n; i++) { 1244 for (j=B->i[i]; j<B->i[i+1]; j++) { 1245 if (garray[B->j[j]] > cstart) break; 1246 column_values[cnt++] = B->a[j]; 1247 } 1248 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1249 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1250 } 1251 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1252 1253 /* store the column values to the file */ 1254 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1255 if (!rank) { 1256 MPI_Status status; 1257 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1258 for (i=1; i<size; i++) { 1259 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1260 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1261 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1262 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1263 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1264 } 1265 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1266 } else { 1267 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1268 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1269 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1271 } 1272 ierr = PetscFree(column_values);CHKERRQ(ierr); 1273 1274 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1275 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #include <petscdraw.h> 1280 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1281 { 1282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1283 PetscErrorCode ierr; 1284 PetscMPIInt rank = aij->rank,size = aij->size; 1285 PetscBool isdraw,iascii,isbinary; 1286 PetscViewer sviewer; 1287 PetscViewerFormat format; 1288 1289 PetscFunctionBegin; 1290 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1291 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1292 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1293 if (iascii) { 1294 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1295 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1296 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1297 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1298 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1299 for (i=0; i<(PetscInt)size; i++) { 1300 nmax = PetscMax(nmax,nz[i]); 1301 nmin = PetscMin(nmin,nz[i]); 1302 navg += nz[i]; 1303 } 1304 ierr = PetscFree(nz);CHKERRQ(ierr); 1305 navg = navg/size; 1306 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1310 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1311 MatInfo info; 1312 PetscBool inodes; 1313 1314 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1315 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1316 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1317 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1318 if (!inodes) { 1319 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1320 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1321 } else { 1322 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1323 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1324 } 1325 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1326 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1327 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1328 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1329 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1330 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1331 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1332 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1333 PetscFunctionReturn(0); 1334 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1335 PetscInt inodecount,inodelimit,*inodes; 1336 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1337 if (inodes) { 1338 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1339 } else { 1340 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1341 } 1342 PetscFunctionReturn(0); 1343 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1344 PetscFunctionReturn(0); 1345 } 1346 } else if (isbinary) { 1347 if (size == 1) { 1348 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1349 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1350 } else { 1351 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1352 } 1353 PetscFunctionReturn(0); 1354 } else if (isdraw) { 1355 PetscDraw draw; 1356 PetscBool isnull; 1357 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1358 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1359 if (isnull) PetscFunctionReturn(0); 1360 } 1361 1362 { 1363 /* assemble the entire matrix onto first processor. */ 1364 Mat A; 1365 Mat_SeqAIJ *Aloc; 1366 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1367 MatScalar *a; 1368 1369 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1370 if (!rank) { 1371 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1372 } else { 1373 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1374 } 1375 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1376 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1377 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1378 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1379 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1380 1381 /* copy over the A part */ 1382 Aloc = (Mat_SeqAIJ*)aij->A->data; 1383 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1384 row = mat->rmap->rstart; 1385 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1386 for (i=0; i<m; i++) { 1387 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1388 row++; 1389 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1390 } 1391 aj = Aloc->j; 1392 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1393 1394 /* copy over the B part */ 1395 Aloc = (Mat_SeqAIJ*)aij->B->data; 1396 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1397 row = mat->rmap->rstart; 1398 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1399 ct = cols; 1400 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1401 for (i=0; i<m; i++) { 1402 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1403 row++; 1404 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1405 } 1406 ierr = PetscFree(ct);CHKERRQ(ierr); 1407 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1408 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1409 /* 1410 Everyone has to call to draw the matrix since the graphics waits are 1411 synchronized across all processors that share the PetscDraw object 1412 */ 1413 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1414 if (!rank) { 1415 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1416 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1417 } 1418 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1419 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1420 ierr = MatDestroy(&A);CHKERRQ(ierr); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1426 { 1427 PetscErrorCode ierr; 1428 PetscBool iascii,isdraw,issocket,isbinary; 1429 1430 PetscFunctionBegin; 1431 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1432 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1433 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1434 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1435 if (iascii || isdraw || isbinary || issocket) { 1436 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1437 } 1438 PetscFunctionReturn(0); 1439 } 1440 1441 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1442 { 1443 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1444 PetscErrorCode ierr; 1445 Vec bb1 = 0; 1446 PetscBool hasop; 1447 1448 PetscFunctionBegin; 1449 if (flag == SOR_APPLY_UPPER) { 1450 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1451 PetscFunctionReturn(0); 1452 } 1453 1454 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1455 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1456 } 1457 1458 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1461 its--; 1462 } 1463 1464 while (its--) { 1465 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1466 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1467 1468 /* update rhs: bb1 = bb - B*x */ 1469 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1470 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1471 1472 /* local sweep */ 1473 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1474 } 1475 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1476 if (flag & SOR_ZERO_INITIAL_GUESS) { 1477 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1478 its--; 1479 } 1480 while (its--) { 1481 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1482 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1483 1484 /* update rhs: bb1 = bb - B*x */ 1485 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1486 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1487 1488 /* local sweep */ 1489 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1490 } 1491 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1492 if (flag & SOR_ZERO_INITIAL_GUESS) { 1493 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1494 its--; 1495 } 1496 while (its--) { 1497 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1498 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1499 1500 /* update rhs: bb1 = bb - B*x */ 1501 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1502 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1503 1504 /* local sweep */ 1505 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1506 } 1507 } else if (flag & SOR_EISENSTAT) { 1508 Vec xx1; 1509 1510 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1511 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1512 1513 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1515 if (!mat->diag) { 1516 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1517 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1518 } 1519 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1520 if (hasop) { 1521 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1522 } else { 1523 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1524 } 1525 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1526 1527 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1528 1529 /* local sweep */ 1530 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1531 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1532 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1533 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1534 1535 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1536 1537 matin->factorerrortype = mat->A->factorerrortype; 1538 PetscFunctionReturn(0); 1539 } 1540 1541 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1542 { 1543 Mat aA,aB,Aperm; 1544 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1545 PetscScalar *aa,*ba; 1546 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1547 PetscSF rowsf,sf; 1548 IS parcolp = NULL; 1549 PetscBool done; 1550 PetscErrorCode ierr; 1551 1552 PetscFunctionBegin; 1553 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1554 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1555 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1556 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1557 1558 /* Invert row permutation to find out where my rows should go */ 1559 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1560 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1561 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1562 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1563 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1564 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1565 1566 /* Invert column permutation to find out where my columns should go */ 1567 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1568 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1569 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1570 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1571 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1572 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1573 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1574 1575 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1576 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1577 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1578 1579 /* Find out where my gcols should go */ 1580 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1581 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1582 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1583 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1584 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1585 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1586 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1587 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1588 1589 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1590 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1591 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1592 for (i=0; i<m; i++) { 1593 PetscInt row = rdest[i],rowner; 1594 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1595 for (j=ai[i]; j<ai[i+1]; j++) { 1596 PetscInt cowner,col = cdest[aj[j]]; 1597 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1598 if (rowner == cowner) dnnz[i]++; 1599 else onnz[i]++; 1600 } 1601 for (j=bi[i]; j<bi[i+1]; j++) { 1602 PetscInt cowner,col = gcdest[bj[j]]; 1603 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1604 if (rowner == cowner) dnnz[i]++; 1605 else onnz[i]++; 1606 } 1607 } 1608 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1609 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1610 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1611 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1612 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1613 1614 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1615 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1616 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1617 for (i=0; i<m; i++) { 1618 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1619 PetscInt j0,rowlen; 1620 rowlen = ai[i+1] - ai[i]; 1621 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1622 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1623 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1624 } 1625 rowlen = bi[i+1] - bi[i]; 1626 for (j0=j=0; j<rowlen; j0=j) { 1627 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1628 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1629 } 1630 } 1631 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1632 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1633 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1634 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1635 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1636 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1637 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1638 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1639 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1640 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1641 *B = Aperm; 1642 PetscFunctionReturn(0); 1643 } 1644 1645 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1646 { 1647 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1648 PetscErrorCode ierr; 1649 1650 PetscFunctionBegin; 1651 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1652 if (ghosts) *ghosts = aij->garray; 1653 PetscFunctionReturn(0); 1654 } 1655 1656 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1657 { 1658 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1659 Mat A = mat->A,B = mat->B; 1660 PetscErrorCode ierr; 1661 PetscReal isend[5],irecv[5]; 1662 1663 PetscFunctionBegin; 1664 info->block_size = 1.0; 1665 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1666 1667 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1668 isend[3] = info->memory; isend[4] = info->mallocs; 1669 1670 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1671 1672 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1673 isend[3] += info->memory; isend[4] += info->mallocs; 1674 if (flag == MAT_LOCAL) { 1675 info->nz_used = isend[0]; 1676 info->nz_allocated = isend[1]; 1677 info->nz_unneeded = isend[2]; 1678 info->memory = isend[3]; 1679 info->mallocs = isend[4]; 1680 } else if (flag == MAT_GLOBAL_MAX) { 1681 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1682 1683 info->nz_used = irecv[0]; 1684 info->nz_allocated = irecv[1]; 1685 info->nz_unneeded = irecv[2]; 1686 info->memory = irecv[3]; 1687 info->mallocs = irecv[4]; 1688 } else if (flag == MAT_GLOBAL_SUM) { 1689 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1690 1691 info->nz_used = irecv[0]; 1692 info->nz_allocated = irecv[1]; 1693 info->nz_unneeded = irecv[2]; 1694 info->memory = irecv[3]; 1695 info->mallocs = irecv[4]; 1696 } 1697 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1698 info->fill_ratio_needed = 0; 1699 info->factor_mallocs = 0; 1700 PetscFunctionReturn(0); 1701 } 1702 1703 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1704 { 1705 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1706 PetscErrorCode ierr; 1707 1708 PetscFunctionBegin; 1709 switch (op) { 1710 case MAT_NEW_NONZERO_LOCATIONS: 1711 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1712 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1713 case MAT_KEEP_NONZERO_PATTERN: 1714 case MAT_NEW_NONZERO_LOCATION_ERR: 1715 case MAT_USE_INODES: 1716 case MAT_IGNORE_ZERO_ENTRIES: 1717 MatCheckPreallocated(A,1); 1718 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1719 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1720 break; 1721 case MAT_ROW_ORIENTED: 1722 MatCheckPreallocated(A,1); 1723 a->roworiented = flg; 1724 1725 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1726 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1727 break; 1728 case MAT_NEW_DIAGONALS: 1729 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1730 break; 1731 case MAT_IGNORE_OFF_PROC_ENTRIES: 1732 a->donotstash = flg; 1733 break; 1734 case MAT_SPD: 1735 A->spd_set = PETSC_TRUE; 1736 A->spd = flg; 1737 if (flg) { 1738 A->symmetric = PETSC_TRUE; 1739 A->structurally_symmetric = PETSC_TRUE; 1740 A->symmetric_set = PETSC_TRUE; 1741 A->structurally_symmetric_set = PETSC_TRUE; 1742 } 1743 break; 1744 case MAT_SYMMETRIC: 1745 MatCheckPreallocated(A,1); 1746 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1747 break; 1748 case MAT_STRUCTURALLY_SYMMETRIC: 1749 MatCheckPreallocated(A,1); 1750 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1751 break; 1752 case MAT_HERMITIAN: 1753 MatCheckPreallocated(A,1); 1754 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1755 break; 1756 case MAT_SYMMETRY_ETERNAL: 1757 MatCheckPreallocated(A,1); 1758 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1759 break; 1760 case MAT_SUBMAT_SINGLEIS: 1761 A->submat_singleis = flg; 1762 break; 1763 case MAT_STRUCTURE_ONLY: 1764 /* The option is handled directly by MatSetOption() */ 1765 break; 1766 default: 1767 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1768 } 1769 PetscFunctionReturn(0); 1770 } 1771 1772 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1773 { 1774 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1775 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1776 PetscErrorCode ierr; 1777 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1778 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1779 PetscInt *cmap,*idx_p; 1780 1781 PetscFunctionBegin; 1782 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1783 mat->getrowactive = PETSC_TRUE; 1784 1785 if (!mat->rowvalues && (idx || v)) { 1786 /* 1787 allocate enough space to hold information from the longest row. 1788 */ 1789 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1790 PetscInt max = 1,tmp; 1791 for (i=0; i<matin->rmap->n; i++) { 1792 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1793 if (max < tmp) max = tmp; 1794 } 1795 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1796 } 1797 1798 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1799 lrow = row - rstart; 1800 1801 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1802 if (!v) {pvA = 0; pvB = 0;} 1803 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1804 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1805 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1806 nztot = nzA + nzB; 1807 1808 cmap = mat->garray; 1809 if (v || idx) { 1810 if (nztot) { 1811 /* Sort by increasing column numbers, assuming A and B already sorted */ 1812 PetscInt imark = -1; 1813 if (v) { 1814 *v = v_p = mat->rowvalues; 1815 for (i=0; i<nzB; i++) { 1816 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1817 else break; 1818 } 1819 imark = i; 1820 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1821 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1822 } 1823 if (idx) { 1824 *idx = idx_p = mat->rowindices; 1825 if (imark > -1) { 1826 for (i=0; i<imark; i++) { 1827 idx_p[i] = cmap[cworkB[i]]; 1828 } 1829 } else { 1830 for (i=0; i<nzB; i++) { 1831 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1832 else break; 1833 } 1834 imark = i; 1835 } 1836 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1837 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1838 } 1839 } else { 1840 if (idx) *idx = 0; 1841 if (v) *v = 0; 1842 } 1843 } 1844 *nz = nztot; 1845 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1846 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1847 PetscFunctionReturn(0); 1848 } 1849 1850 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1851 { 1852 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1853 1854 PetscFunctionBegin; 1855 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1856 aij->getrowactive = PETSC_FALSE; 1857 PetscFunctionReturn(0); 1858 } 1859 1860 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1861 { 1862 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1863 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1864 PetscErrorCode ierr; 1865 PetscInt i,j,cstart = mat->cmap->rstart; 1866 PetscReal sum = 0.0; 1867 MatScalar *v; 1868 1869 PetscFunctionBegin; 1870 if (aij->size == 1) { 1871 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1872 } else { 1873 if (type == NORM_FROBENIUS) { 1874 v = amat->a; 1875 for (i=0; i<amat->nz; i++) { 1876 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1877 } 1878 v = bmat->a; 1879 for (i=0; i<bmat->nz; i++) { 1880 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1881 } 1882 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1883 *norm = PetscSqrtReal(*norm); 1884 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1885 } else if (type == NORM_1) { /* max column norm */ 1886 PetscReal *tmp,*tmp2; 1887 PetscInt *jj,*garray = aij->garray; 1888 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1889 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1890 *norm = 0.0; 1891 v = amat->a; jj = amat->j; 1892 for (j=0; j<amat->nz; j++) { 1893 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1894 } 1895 v = bmat->a; jj = bmat->j; 1896 for (j=0; j<bmat->nz; j++) { 1897 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1898 } 1899 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1900 for (j=0; j<mat->cmap->N; j++) { 1901 if (tmp2[j] > *norm) *norm = tmp2[j]; 1902 } 1903 ierr = PetscFree(tmp);CHKERRQ(ierr); 1904 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1905 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1906 } else if (type == NORM_INFINITY) { /* max row norm */ 1907 PetscReal ntemp = 0.0; 1908 for (j=0; j<aij->A->rmap->n; j++) { 1909 v = amat->a + amat->i[j]; 1910 sum = 0.0; 1911 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1912 sum += PetscAbsScalar(*v); v++; 1913 } 1914 v = bmat->a + bmat->i[j]; 1915 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1916 sum += PetscAbsScalar(*v); v++; 1917 } 1918 if (sum > ntemp) ntemp = sum; 1919 } 1920 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1921 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1922 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1923 } 1924 PetscFunctionReturn(0); 1925 } 1926 1927 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1928 { 1929 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1930 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1931 PetscErrorCode ierr; 1932 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1933 PetscInt cstart = A->cmap->rstart,ncol; 1934 Mat B; 1935 MatScalar *array; 1936 1937 PetscFunctionBegin; 1938 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1939 ai = Aloc->i; aj = Aloc->j; 1940 bi = Bloc->i; bj = Bloc->j; 1941 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1942 PetscInt *d_nnz,*g_nnz,*o_nnz; 1943 PetscSFNode *oloc; 1944 PETSC_UNUSED PetscSF sf; 1945 1946 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1947 /* compute d_nnz for preallocation */ 1948 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1949 for (i=0; i<ai[ma]; i++) { 1950 d_nnz[aj[i]]++; 1951 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1952 } 1953 /* compute local off-diagonal contributions */ 1954 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1955 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1956 /* map those to global */ 1957 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1958 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1959 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1960 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1961 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1962 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1963 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1964 1965 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1966 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1967 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1968 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1969 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1970 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1971 } else { 1972 B = *matout; 1973 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1974 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1975 } 1976 1977 /* copy over the A part */ 1978 array = Aloc->a; 1979 row = A->rmap->rstart; 1980 for (i=0; i<ma; i++) { 1981 ncol = ai[i+1]-ai[i]; 1982 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1983 row++; 1984 array += ncol; aj += ncol; 1985 } 1986 aj = Aloc->j; 1987 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1988 1989 /* copy over the B part */ 1990 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1991 array = Bloc->a; 1992 row = A->rmap->rstart; 1993 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1994 cols_tmp = cols; 1995 for (i=0; i<mb; i++) { 1996 ncol = bi[i+1]-bi[i]; 1997 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1998 row++; 1999 array += ncol; cols_tmp += ncol; 2000 } 2001 ierr = PetscFree(cols);CHKERRQ(ierr); 2002 2003 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2004 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2005 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2006 *matout = B; 2007 } else { 2008 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2009 } 2010 PetscFunctionReturn(0); 2011 } 2012 2013 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2014 { 2015 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2016 Mat a = aij->A,b = aij->B; 2017 PetscErrorCode ierr; 2018 PetscInt s1,s2,s3; 2019 2020 PetscFunctionBegin; 2021 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2022 if (rr) { 2023 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2024 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2025 /* Overlap communication with computation. */ 2026 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2027 } 2028 if (ll) { 2029 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2030 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2031 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2032 } 2033 /* scale the diagonal block */ 2034 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2035 2036 if (rr) { 2037 /* Do a scatter end and then right scale the off-diagonal block */ 2038 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2039 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2040 } 2041 PetscFunctionReturn(0); 2042 } 2043 2044 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2045 { 2046 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2047 PetscErrorCode ierr; 2048 2049 PetscFunctionBegin; 2050 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2051 PetscFunctionReturn(0); 2052 } 2053 2054 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2055 { 2056 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2057 Mat a,b,c,d; 2058 PetscBool flg; 2059 PetscErrorCode ierr; 2060 2061 PetscFunctionBegin; 2062 a = matA->A; b = matA->B; 2063 c = matB->A; d = matB->B; 2064 2065 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2066 if (flg) { 2067 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2068 } 2069 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2070 PetscFunctionReturn(0); 2071 } 2072 2073 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2074 { 2075 PetscErrorCode ierr; 2076 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2077 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2078 2079 PetscFunctionBegin; 2080 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2081 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2082 /* because of the column compression in the off-processor part of the matrix a->B, 2083 the number of columns in a->B and b->B may be different, hence we cannot call 2084 the MatCopy() directly on the two parts. If need be, we can provide a more 2085 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2086 then copying the submatrices */ 2087 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2088 } else { 2089 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2090 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2091 } 2092 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2093 PetscFunctionReturn(0); 2094 } 2095 2096 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2097 { 2098 PetscErrorCode ierr; 2099 2100 PetscFunctionBegin; 2101 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 /* 2106 Computes the number of nonzeros per row needed for preallocation when X and Y 2107 have different nonzero structure. 2108 */ 2109 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2110 { 2111 PetscInt i,j,k,nzx,nzy; 2112 2113 PetscFunctionBegin; 2114 /* Set the number of nonzeros in the new matrix */ 2115 for (i=0; i<m; i++) { 2116 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2117 nzx = xi[i+1] - xi[i]; 2118 nzy = yi[i+1] - yi[i]; 2119 nnz[i] = 0; 2120 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2121 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2122 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2123 nnz[i]++; 2124 } 2125 for (; k<nzy; k++) nnz[i]++; 2126 } 2127 PetscFunctionReturn(0); 2128 } 2129 2130 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2131 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2132 { 2133 PetscErrorCode ierr; 2134 PetscInt m = Y->rmap->N; 2135 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2136 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2137 2138 PetscFunctionBegin; 2139 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2140 PetscFunctionReturn(0); 2141 } 2142 2143 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2144 { 2145 PetscErrorCode ierr; 2146 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2147 PetscBLASInt bnz,one=1; 2148 Mat_SeqAIJ *x,*y; 2149 2150 PetscFunctionBegin; 2151 if (str == SAME_NONZERO_PATTERN) { 2152 PetscScalar alpha = a; 2153 x = (Mat_SeqAIJ*)xx->A->data; 2154 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2155 y = (Mat_SeqAIJ*)yy->A->data; 2156 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2157 x = (Mat_SeqAIJ*)xx->B->data; 2158 y = (Mat_SeqAIJ*)yy->B->data; 2159 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2160 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2161 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2162 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2163 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2164 } else { 2165 Mat B; 2166 PetscInt *nnz_d,*nnz_o; 2167 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2168 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2169 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2170 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2171 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2172 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2173 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2174 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2175 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2176 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2177 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2178 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2179 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2180 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2181 } 2182 PetscFunctionReturn(0); 2183 } 2184 2185 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2186 2187 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2188 { 2189 #if defined(PETSC_USE_COMPLEX) 2190 PetscErrorCode ierr; 2191 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2192 2193 PetscFunctionBegin; 2194 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2195 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2196 #else 2197 PetscFunctionBegin; 2198 #endif 2199 PetscFunctionReturn(0); 2200 } 2201 2202 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2209 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2210 PetscFunctionReturn(0); 2211 } 2212 2213 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2214 { 2215 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2216 PetscErrorCode ierr; 2217 2218 PetscFunctionBegin; 2219 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2220 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2221 PetscFunctionReturn(0); 2222 } 2223 2224 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2225 { 2226 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2227 PetscErrorCode ierr; 2228 PetscInt i,*idxb = 0; 2229 PetscScalar *va,*vb; 2230 Vec vtmp; 2231 2232 PetscFunctionBegin; 2233 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2234 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2235 if (idx) { 2236 for (i=0; i<A->rmap->n; i++) { 2237 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2238 } 2239 } 2240 2241 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2242 if (idx) { 2243 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2244 } 2245 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2246 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2247 2248 for (i=0; i<A->rmap->n; i++) { 2249 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2250 va[i] = vb[i]; 2251 if (idx) idx[i] = a->garray[idxb[i]]; 2252 } 2253 } 2254 2255 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2256 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2257 ierr = PetscFree(idxb);CHKERRQ(ierr); 2258 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2259 PetscFunctionReturn(0); 2260 } 2261 2262 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2263 { 2264 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2265 PetscErrorCode ierr; 2266 PetscInt i,*idxb = 0; 2267 PetscScalar *va,*vb; 2268 Vec vtmp; 2269 2270 PetscFunctionBegin; 2271 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2272 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2273 if (idx) { 2274 for (i=0; i<A->cmap->n; i++) { 2275 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2276 } 2277 } 2278 2279 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2280 if (idx) { 2281 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2282 } 2283 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2284 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2285 2286 for (i=0; i<A->rmap->n; i++) { 2287 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2288 va[i] = vb[i]; 2289 if (idx) idx[i] = a->garray[idxb[i]]; 2290 } 2291 } 2292 2293 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2294 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2295 ierr = PetscFree(idxb);CHKERRQ(ierr); 2296 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2297 PetscFunctionReturn(0); 2298 } 2299 2300 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2301 { 2302 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2303 PetscInt n = A->rmap->n; 2304 PetscInt cstart = A->cmap->rstart; 2305 PetscInt *cmap = mat->garray; 2306 PetscInt *diagIdx, *offdiagIdx; 2307 Vec diagV, offdiagV; 2308 PetscScalar *a, *diagA, *offdiagA; 2309 PetscInt r; 2310 PetscErrorCode ierr; 2311 2312 PetscFunctionBegin; 2313 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2314 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2315 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2316 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2317 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2318 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2319 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2320 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2321 for (r = 0; r < n; ++r) { 2322 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2323 a[r] = diagA[r]; 2324 idx[r] = cstart + diagIdx[r]; 2325 } else { 2326 a[r] = offdiagA[r]; 2327 idx[r] = cmap[offdiagIdx[r]]; 2328 } 2329 } 2330 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2331 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2332 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2333 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2334 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2335 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2336 PetscFunctionReturn(0); 2337 } 2338 2339 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2340 { 2341 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2342 PetscInt n = A->rmap->n; 2343 PetscInt cstart = A->cmap->rstart; 2344 PetscInt *cmap = mat->garray; 2345 PetscInt *diagIdx, *offdiagIdx; 2346 Vec diagV, offdiagV; 2347 PetscScalar *a, *diagA, *offdiagA; 2348 PetscInt r; 2349 PetscErrorCode ierr; 2350 2351 PetscFunctionBegin; 2352 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2353 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2354 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2355 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2356 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2357 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2358 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2359 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2360 for (r = 0; r < n; ++r) { 2361 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2362 a[r] = diagA[r]; 2363 idx[r] = cstart + diagIdx[r]; 2364 } else { 2365 a[r] = offdiagA[r]; 2366 idx[r] = cmap[offdiagIdx[r]]; 2367 } 2368 } 2369 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2370 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2371 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2372 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2373 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2374 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2375 PetscFunctionReturn(0); 2376 } 2377 2378 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2379 { 2380 PetscErrorCode ierr; 2381 Mat *dummy; 2382 2383 PetscFunctionBegin; 2384 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2385 *newmat = *dummy; 2386 ierr = PetscFree(dummy);CHKERRQ(ierr); 2387 PetscFunctionReturn(0); 2388 } 2389 2390 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2391 { 2392 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2393 PetscErrorCode ierr; 2394 2395 PetscFunctionBegin; 2396 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2397 A->factorerrortype = a->A->factorerrortype; 2398 PetscFunctionReturn(0); 2399 } 2400 2401 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2402 { 2403 PetscErrorCode ierr; 2404 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2405 2406 PetscFunctionBegin; 2407 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2408 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2409 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2410 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2411 PetscFunctionReturn(0); 2412 } 2413 2414 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2415 { 2416 PetscFunctionBegin; 2417 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2418 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2419 PetscFunctionReturn(0); 2420 } 2421 2422 /*@ 2423 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2424 2425 Collective on Mat 2426 2427 Input Parameters: 2428 + A - the matrix 2429 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2430 2431 Level: advanced 2432 2433 @*/ 2434 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2435 { 2436 PetscErrorCode ierr; 2437 2438 PetscFunctionBegin; 2439 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2444 { 2445 PetscErrorCode ierr; 2446 PetscBool sc = PETSC_FALSE,flg; 2447 2448 PetscFunctionBegin; 2449 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2450 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 2451 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2452 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2453 if (flg) { 2454 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2455 } 2456 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2457 PetscFunctionReturn(0); 2458 } 2459 2460 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2461 { 2462 PetscErrorCode ierr; 2463 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2464 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2465 2466 PetscFunctionBegin; 2467 if (!Y->preallocated) { 2468 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2469 } else if (!aij->nz) { 2470 PetscInt nonew = aij->nonew; 2471 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2472 aij->nonew = nonew; 2473 } 2474 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2475 PetscFunctionReturn(0); 2476 } 2477 2478 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2479 { 2480 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2481 PetscErrorCode ierr; 2482 2483 PetscFunctionBegin; 2484 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2485 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2486 if (d) { 2487 PetscInt rstart; 2488 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2489 *d += rstart; 2490 2491 } 2492 PetscFunctionReturn(0); 2493 } 2494 2495 2496 /* -------------------------------------------------------------------*/ 2497 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2498 MatGetRow_MPIAIJ, 2499 MatRestoreRow_MPIAIJ, 2500 MatMult_MPIAIJ, 2501 /* 4*/ MatMultAdd_MPIAIJ, 2502 MatMultTranspose_MPIAIJ, 2503 MatMultTransposeAdd_MPIAIJ, 2504 0, 2505 0, 2506 0, 2507 /*10*/ 0, 2508 0, 2509 0, 2510 MatSOR_MPIAIJ, 2511 MatTranspose_MPIAIJ, 2512 /*15*/ MatGetInfo_MPIAIJ, 2513 MatEqual_MPIAIJ, 2514 MatGetDiagonal_MPIAIJ, 2515 MatDiagonalScale_MPIAIJ, 2516 MatNorm_MPIAIJ, 2517 /*20*/ MatAssemblyBegin_MPIAIJ, 2518 MatAssemblyEnd_MPIAIJ, 2519 MatSetOption_MPIAIJ, 2520 MatZeroEntries_MPIAIJ, 2521 /*24*/ MatZeroRows_MPIAIJ, 2522 0, 2523 0, 2524 0, 2525 0, 2526 /*29*/ MatSetUp_MPIAIJ, 2527 0, 2528 0, 2529 MatGetDiagonalBlock_MPIAIJ, 2530 0, 2531 /*34*/ MatDuplicate_MPIAIJ, 2532 0, 2533 0, 2534 0, 2535 0, 2536 /*39*/ MatAXPY_MPIAIJ, 2537 MatCreateSubMatrices_MPIAIJ, 2538 MatIncreaseOverlap_MPIAIJ, 2539 MatGetValues_MPIAIJ, 2540 MatCopy_MPIAIJ, 2541 /*44*/ MatGetRowMax_MPIAIJ, 2542 MatScale_MPIAIJ, 2543 MatShift_MPIAIJ, 2544 MatDiagonalSet_MPIAIJ, 2545 MatZeroRowsColumns_MPIAIJ, 2546 /*49*/ MatSetRandom_MPIAIJ, 2547 0, 2548 0, 2549 0, 2550 0, 2551 /*54*/ MatFDColoringCreate_MPIXAIJ, 2552 0, 2553 MatSetUnfactored_MPIAIJ, 2554 MatPermute_MPIAIJ, 2555 0, 2556 /*59*/ MatCreateSubMatrix_MPIAIJ, 2557 MatDestroy_MPIAIJ, 2558 MatView_MPIAIJ, 2559 0, 2560 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2561 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2562 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2563 0, 2564 0, 2565 0, 2566 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2567 MatGetRowMinAbs_MPIAIJ, 2568 0, 2569 0, 2570 0, 2571 0, 2572 /*75*/ MatFDColoringApply_AIJ, 2573 MatSetFromOptions_MPIAIJ, 2574 0, 2575 0, 2576 MatFindZeroDiagonals_MPIAIJ, 2577 /*80*/ 0, 2578 0, 2579 0, 2580 /*83*/ MatLoad_MPIAIJ, 2581 MatIsSymmetric_MPIAIJ, 2582 0, 2583 0, 2584 0, 2585 0, 2586 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2587 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2588 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2589 MatPtAP_MPIAIJ_MPIAIJ, 2590 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2591 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2592 0, 2593 0, 2594 0, 2595 0, 2596 /*99*/ 0, 2597 0, 2598 0, 2599 MatConjugate_MPIAIJ, 2600 0, 2601 /*104*/MatSetValuesRow_MPIAIJ, 2602 MatRealPart_MPIAIJ, 2603 MatImaginaryPart_MPIAIJ, 2604 0, 2605 0, 2606 /*109*/0, 2607 0, 2608 MatGetRowMin_MPIAIJ, 2609 0, 2610 MatMissingDiagonal_MPIAIJ, 2611 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2612 0, 2613 MatGetGhosts_MPIAIJ, 2614 0, 2615 0, 2616 /*119*/0, 2617 0, 2618 0, 2619 0, 2620 MatGetMultiProcBlock_MPIAIJ, 2621 /*124*/MatFindNonzeroRows_MPIAIJ, 2622 MatGetColumnNorms_MPIAIJ, 2623 MatInvertBlockDiagonal_MPIAIJ, 2624 0, 2625 MatCreateSubMatricesMPI_MPIAIJ, 2626 /*129*/0, 2627 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2628 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2629 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2630 0, 2631 /*134*/0, 2632 0, 2633 MatRARt_MPIAIJ_MPIAIJ, 2634 0, 2635 0, 2636 /*139*/MatSetBlockSizes_MPIAIJ, 2637 0, 2638 0, 2639 MatFDColoringSetUp_MPIXAIJ, 2640 MatFindOffBlockDiagonalEntries_MPIAIJ, 2641 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2642 }; 2643 2644 /* ----------------------------------------------------------------------------------------*/ 2645 2646 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2647 { 2648 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2649 PetscErrorCode ierr; 2650 2651 PetscFunctionBegin; 2652 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2653 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2654 PetscFunctionReturn(0); 2655 } 2656 2657 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2658 { 2659 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2660 PetscErrorCode ierr; 2661 2662 PetscFunctionBegin; 2663 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2664 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2665 PetscFunctionReturn(0); 2666 } 2667 2668 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2669 { 2670 Mat_MPIAIJ *b; 2671 PetscErrorCode ierr; 2672 2673 PetscFunctionBegin; 2674 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2675 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2676 b = (Mat_MPIAIJ*)B->data; 2677 2678 #if defined(PETSC_USE_CTABLE) 2679 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2680 #else 2681 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2682 #endif 2683 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2684 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2685 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2686 2687 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2688 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2689 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2690 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2691 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2692 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2693 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2694 2695 if (!B->preallocated) { 2696 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2697 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2698 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2699 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2700 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2701 } 2702 2703 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2704 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2705 B->preallocated = PETSC_TRUE; 2706 B->was_assembled = PETSC_FALSE; 2707 B->assembled = PETSC_FALSE;; 2708 PetscFunctionReturn(0); 2709 } 2710 2711 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2712 { 2713 Mat_MPIAIJ *b; 2714 PetscErrorCode ierr; 2715 2716 PetscFunctionBegin; 2717 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2718 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2719 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2720 b = (Mat_MPIAIJ*)B->data; 2721 2722 #if defined(PETSC_USE_CTABLE) 2723 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2724 #else 2725 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2726 #endif 2727 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2728 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2729 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2730 2731 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2732 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2733 B->preallocated = PETSC_TRUE; 2734 B->was_assembled = PETSC_FALSE; 2735 B->assembled = PETSC_FALSE; 2736 PetscFunctionReturn(0); 2737 } 2738 2739 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2740 { 2741 Mat mat; 2742 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2743 PetscErrorCode ierr; 2744 2745 PetscFunctionBegin; 2746 *newmat = 0; 2747 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2748 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2749 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2750 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2751 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2752 a = (Mat_MPIAIJ*)mat->data; 2753 2754 mat->factortype = matin->factortype; 2755 mat->assembled = PETSC_TRUE; 2756 mat->insertmode = NOT_SET_VALUES; 2757 mat->preallocated = PETSC_TRUE; 2758 2759 a->size = oldmat->size; 2760 a->rank = oldmat->rank; 2761 a->donotstash = oldmat->donotstash; 2762 a->roworiented = oldmat->roworiented; 2763 a->rowindices = 0; 2764 a->rowvalues = 0; 2765 a->getrowactive = PETSC_FALSE; 2766 2767 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2768 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2769 2770 if (oldmat->colmap) { 2771 #if defined(PETSC_USE_CTABLE) 2772 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2773 #else 2774 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2775 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2776 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2777 #endif 2778 } else a->colmap = 0; 2779 if (oldmat->garray) { 2780 PetscInt len; 2781 len = oldmat->B->cmap->n; 2782 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2783 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2784 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2785 } else a->garray = 0; 2786 2787 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2788 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2789 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2791 2792 if (oldmat->Mvctx_mpi1) { 2793 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2794 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2795 } 2796 2797 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2798 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2799 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2800 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2801 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2802 *newmat = mat; 2803 PetscFunctionReturn(0); 2804 } 2805 2806 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2807 { 2808 PetscScalar *vals,*svals; 2809 MPI_Comm comm; 2810 PetscErrorCode ierr; 2811 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2812 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2813 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2814 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2815 PetscInt cend,cstart,n,*rowners; 2816 int fd; 2817 PetscInt bs = newMat->rmap->bs; 2818 2819 PetscFunctionBegin; 2820 /* force binary viewer to load .info file if it has not yet done so */ 2821 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2822 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2823 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2824 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2825 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2826 if (!rank) { 2827 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2828 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2829 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2830 } 2831 2832 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2833 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2834 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2835 if (bs < 0) bs = 1; 2836 2837 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2838 M = header[1]; N = header[2]; 2839 2840 /* If global sizes are set, check if they are consistent with that given in the file */ 2841 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2842 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2843 2844 /* determine ownership of all (block) rows */ 2845 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2846 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2847 else m = newMat->rmap->n; /* Set by user */ 2848 2849 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2850 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2851 2852 /* First process needs enough room for process with most rows */ 2853 if (!rank) { 2854 mmax = rowners[1]; 2855 for (i=2; i<=size; i++) { 2856 mmax = PetscMax(mmax, rowners[i]); 2857 } 2858 } else mmax = -1; /* unused, but compilers complain */ 2859 2860 rowners[0] = 0; 2861 for (i=2; i<=size; i++) { 2862 rowners[i] += rowners[i-1]; 2863 } 2864 rstart = rowners[rank]; 2865 rend = rowners[rank+1]; 2866 2867 /* distribute row lengths to all processors */ 2868 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2869 if (!rank) { 2870 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2871 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2872 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2873 for (j=0; j<m; j++) { 2874 procsnz[0] += ourlens[j]; 2875 } 2876 for (i=1; i<size; i++) { 2877 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2878 /* calculate the number of nonzeros on each processor */ 2879 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2880 procsnz[i] += rowlengths[j]; 2881 } 2882 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2883 } 2884 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2885 } else { 2886 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2887 } 2888 2889 if (!rank) { 2890 /* determine max buffer needed and allocate it */ 2891 maxnz = 0; 2892 for (i=0; i<size; i++) { 2893 maxnz = PetscMax(maxnz,procsnz[i]); 2894 } 2895 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2896 2897 /* read in my part of the matrix column indices */ 2898 nz = procsnz[0]; 2899 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2900 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2901 2902 /* read in every one elses and ship off */ 2903 for (i=1; i<size; i++) { 2904 nz = procsnz[i]; 2905 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2906 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2907 } 2908 ierr = PetscFree(cols);CHKERRQ(ierr); 2909 } else { 2910 /* determine buffer space needed for message */ 2911 nz = 0; 2912 for (i=0; i<m; i++) { 2913 nz += ourlens[i]; 2914 } 2915 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2916 2917 /* receive message of column indices*/ 2918 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2919 } 2920 2921 /* determine column ownership if matrix is not square */ 2922 if (N != M) { 2923 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2924 else n = newMat->cmap->n; 2925 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2926 cstart = cend - n; 2927 } else { 2928 cstart = rstart; 2929 cend = rend; 2930 n = cend - cstart; 2931 } 2932 2933 /* loop over local rows, determining number of off diagonal entries */ 2934 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2935 jj = 0; 2936 for (i=0; i<m; i++) { 2937 for (j=0; j<ourlens[i]; j++) { 2938 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2939 jj++; 2940 } 2941 } 2942 2943 for (i=0; i<m; i++) { 2944 ourlens[i] -= offlens[i]; 2945 } 2946 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2947 2948 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2949 2950 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2951 2952 for (i=0; i<m; i++) { 2953 ourlens[i] += offlens[i]; 2954 } 2955 2956 if (!rank) { 2957 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2958 2959 /* read in my part of the matrix numerical values */ 2960 nz = procsnz[0]; 2961 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2962 2963 /* insert into matrix */ 2964 jj = rstart; 2965 smycols = mycols; 2966 svals = vals; 2967 for (i=0; i<m; i++) { 2968 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2969 smycols += ourlens[i]; 2970 svals += ourlens[i]; 2971 jj++; 2972 } 2973 2974 /* read in other processors and ship out */ 2975 for (i=1; i<size; i++) { 2976 nz = procsnz[i]; 2977 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2978 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2979 } 2980 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2981 } else { 2982 /* receive numeric values */ 2983 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2984 2985 /* receive message of values*/ 2986 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2987 2988 /* insert into matrix */ 2989 jj = rstart; 2990 smycols = mycols; 2991 svals = vals; 2992 for (i=0; i<m; i++) { 2993 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2994 smycols += ourlens[i]; 2995 svals += ourlens[i]; 2996 jj++; 2997 } 2998 } 2999 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3000 ierr = PetscFree(vals);CHKERRQ(ierr); 3001 ierr = PetscFree(mycols);CHKERRQ(ierr); 3002 ierr = PetscFree(rowners);CHKERRQ(ierr); 3003 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3004 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3005 PetscFunctionReturn(0); 3006 } 3007 3008 /* Not scalable because of ISAllGather() unless getting all columns. */ 3009 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3010 { 3011 PetscErrorCode ierr; 3012 IS iscol_local; 3013 PetscBool isstride; 3014 PetscMPIInt lisstride=0,gisstride; 3015 3016 PetscFunctionBegin; 3017 /* check if we are grabbing all columns*/ 3018 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3019 3020 if (isstride) { 3021 PetscInt start,len,mstart,mlen; 3022 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3023 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3024 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3025 if (mstart == start && mlen-mstart == len) lisstride = 1; 3026 } 3027 3028 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3029 if (gisstride) { 3030 PetscInt N; 3031 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3032 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3033 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3034 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3035 } else { 3036 PetscInt cbs; 3037 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3038 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3039 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3040 } 3041 3042 *isseq = iscol_local; 3043 PetscFunctionReturn(0); 3044 } 3045 3046 /* 3047 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3048 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3049 3050 Input Parameters: 3051 mat - matrix 3052 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3053 i.e., mat->rstart <= isrow[i] < mat->rend 3054 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3055 i.e., mat->cstart <= iscol[i] < mat->cend 3056 Output Parameter: 3057 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3058 iscol_o - sequential column index set for retrieving mat->B 3059 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3060 */ 3061 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3062 { 3063 PetscErrorCode ierr; 3064 Vec x,cmap; 3065 const PetscInt *is_idx; 3066 PetscScalar *xarray,*cmaparray; 3067 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3068 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3069 Mat B=a->B; 3070 Vec lvec=a->lvec,lcmap; 3071 PetscInt i,cstart,cend,Bn=B->cmap->N; 3072 MPI_Comm comm; 3073 VecScatter Mvctx=a->Mvctx; 3074 3075 PetscFunctionBegin; 3076 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3077 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3078 3079 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3080 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3081 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3082 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3083 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3084 3085 /* Get start indices */ 3086 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3087 isstart -= ncols; 3088 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3089 3090 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3091 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3092 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3093 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3094 for (i=0; i<ncols; i++) { 3095 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3096 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3097 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3098 } 3099 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3100 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3101 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3102 3103 /* Get iscol_d */ 3104 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3105 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3106 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3107 3108 /* Get isrow_d */ 3109 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3110 rstart = mat->rmap->rstart; 3111 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3112 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3113 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3114 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3115 3116 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3117 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3118 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3119 3120 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3121 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3122 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3123 3124 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3125 3126 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3127 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3128 3129 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3130 /* off-process column indices */ 3131 count = 0; 3132 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3133 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3134 3135 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3136 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3137 for (i=0; i<Bn; i++) { 3138 if (PetscRealPart(xarray[i]) > -1.0) { 3139 idx[count] = i; /* local column index in off-diagonal part B */ 3140 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3141 count++; 3142 } 3143 } 3144 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3145 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3146 3147 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3148 /* cannot ensure iscol_o has same blocksize as iscol! */ 3149 3150 ierr = PetscFree(idx);CHKERRQ(ierr); 3151 *garray = cmap1; 3152 3153 ierr = VecDestroy(&x);CHKERRQ(ierr); 3154 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3155 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3156 PetscFunctionReturn(0); 3157 } 3158 3159 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3160 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3161 { 3162 PetscErrorCode ierr; 3163 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3164 Mat M = NULL; 3165 MPI_Comm comm; 3166 IS iscol_d,isrow_d,iscol_o; 3167 Mat Asub = NULL,Bsub = NULL; 3168 PetscInt n; 3169 3170 PetscFunctionBegin; 3171 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3172 3173 if (call == MAT_REUSE_MATRIX) { 3174 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3175 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3176 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3177 3178 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3179 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3180 3181 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3182 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3183 3184 /* Update diagonal and off-diagonal portions of submat */ 3185 asub = (Mat_MPIAIJ*)(*submat)->data; 3186 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3187 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3188 if (n) { 3189 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3190 } 3191 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3192 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3193 3194 } else { /* call == MAT_INITIAL_MATRIX) */ 3195 const PetscInt *garray; 3196 PetscInt BsubN; 3197 3198 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3199 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3200 3201 /* Create local submatrices Asub and Bsub */ 3202 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3203 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3204 3205 /* Create submatrix M */ 3206 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3207 3208 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3209 asub = (Mat_MPIAIJ*)M->data; 3210 3211 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3212 n = asub->B->cmap->N; 3213 if (BsubN > n) { 3214 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3215 const PetscInt *idx; 3216 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3217 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3218 3219 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3220 j = 0; 3221 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3222 for (i=0; i<n; i++) { 3223 if (j >= BsubN) break; 3224 while (subgarray[i] > garray[j]) j++; 3225 3226 if (subgarray[i] == garray[j]) { 3227 idx_new[i] = idx[j++]; 3228 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3229 } 3230 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3231 3232 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3233 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3234 3235 } else if (BsubN < n) { 3236 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3237 } 3238 3239 ierr = PetscFree(garray);CHKERRQ(ierr); 3240 *submat = M; 3241 3242 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3243 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3244 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3245 3246 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3247 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3248 3249 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3250 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3251 } 3252 PetscFunctionReturn(0); 3253 } 3254 3255 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3256 { 3257 PetscErrorCode ierr; 3258 IS iscol_local=NULL,isrow_d; 3259 PetscInt csize; 3260 PetscInt n,i,j,start,end; 3261 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3262 MPI_Comm comm; 3263 3264 PetscFunctionBegin; 3265 /* If isrow has same processor distribution as mat, 3266 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3267 if (call == MAT_REUSE_MATRIX) { 3268 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3269 if (isrow_d) { 3270 sameRowDist = PETSC_TRUE; 3271 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3272 } else { 3273 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3274 if (iscol_local) { 3275 sameRowDist = PETSC_TRUE; 3276 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3277 } 3278 } 3279 } else { 3280 /* Check if isrow has same processor distribution as mat */ 3281 sameDist[0] = PETSC_FALSE; 3282 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3283 if (!n) { 3284 sameDist[0] = PETSC_TRUE; 3285 } else { 3286 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3287 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3288 if (i >= start && j < end) { 3289 sameDist[0] = PETSC_TRUE; 3290 } 3291 } 3292 3293 /* Check if iscol has same processor distribution as mat */ 3294 sameDist[1] = PETSC_FALSE; 3295 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3296 if (!n) { 3297 sameDist[1] = PETSC_TRUE; 3298 } else { 3299 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3300 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3301 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3302 } 3303 3304 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3305 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3306 sameRowDist = tsameDist[0]; 3307 } 3308 3309 if (sameRowDist) { 3310 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3311 /* isrow and iscol have same processor distribution as mat */ 3312 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3313 PetscFunctionReturn(0); 3314 } else { /* sameRowDist */ 3315 /* isrow has same processor distribution as mat */ 3316 if (call == MAT_INITIAL_MATRIX) { 3317 PetscBool sorted; 3318 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3319 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3320 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3321 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3322 3323 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3324 if (sorted) { 3325 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3326 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3327 PetscFunctionReturn(0); 3328 } 3329 } else { /* call == MAT_REUSE_MATRIX */ 3330 IS iscol_sub; 3331 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3332 if (iscol_sub) { 3333 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3334 PetscFunctionReturn(0); 3335 } 3336 } 3337 } 3338 } 3339 3340 /* General case: iscol -> iscol_local which has global size of iscol */ 3341 if (call == MAT_REUSE_MATRIX) { 3342 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3343 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3344 } else { 3345 if (!iscol_local) { 3346 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3347 } 3348 } 3349 3350 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3351 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3352 3353 if (call == MAT_INITIAL_MATRIX) { 3354 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3355 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3356 } 3357 PetscFunctionReturn(0); 3358 } 3359 3360 /*@C 3361 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3362 and "off-diagonal" part of the matrix in CSR format. 3363 3364 Collective on MPI_Comm 3365 3366 Input Parameters: 3367 + comm - MPI communicator 3368 . A - "diagonal" portion of matrix 3369 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3370 - garray - global index of B columns 3371 3372 Output Parameter: 3373 . mat - the matrix, with input A as its local diagonal matrix 3374 Level: advanced 3375 3376 Notes: 3377 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3378 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3379 3380 .seealso: MatCreateMPIAIJWithSplitArrays() 3381 @*/ 3382 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3383 { 3384 PetscErrorCode ierr; 3385 Mat_MPIAIJ *maij; 3386 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3387 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3388 PetscScalar *oa=b->a; 3389 Mat Bnew; 3390 PetscInt m,n,N; 3391 3392 PetscFunctionBegin; 3393 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3394 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3395 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3396 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3397 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3398 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3399 3400 /* Get global columns of mat */ 3401 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3402 3403 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3404 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3405 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3406 maij = (Mat_MPIAIJ*)(*mat)->data; 3407 3408 (*mat)->preallocated = PETSC_TRUE; 3409 3410 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3411 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3412 3413 /* Set A as diagonal portion of *mat */ 3414 maij->A = A; 3415 3416 nz = oi[m]; 3417 for (i=0; i<nz; i++) { 3418 col = oj[i]; 3419 oj[i] = garray[col]; 3420 } 3421 3422 /* Set Bnew as off-diagonal portion of *mat */ 3423 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3424 bnew = (Mat_SeqAIJ*)Bnew->data; 3425 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3426 maij->B = Bnew; 3427 3428 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3429 3430 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3431 b->free_a = PETSC_FALSE; 3432 b->free_ij = PETSC_FALSE; 3433 ierr = MatDestroy(&B);CHKERRQ(ierr); 3434 3435 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3436 bnew->free_a = PETSC_TRUE; 3437 bnew->free_ij = PETSC_TRUE; 3438 3439 /* condense columns of maij->B */ 3440 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3441 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3442 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3443 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3444 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3445 PetscFunctionReturn(0); 3446 } 3447 3448 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3449 3450 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3451 { 3452 PetscErrorCode ierr; 3453 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3454 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3455 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3456 Mat M,Msub,B=a->B; 3457 MatScalar *aa; 3458 Mat_SeqAIJ *aij; 3459 PetscInt *garray = a->garray,*colsub,Ncols; 3460 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3461 IS iscol_sub,iscmap; 3462 const PetscInt *is_idx,*cmap; 3463 PetscBool allcolumns=PETSC_FALSE; 3464 MPI_Comm comm; 3465 3466 PetscFunctionBegin; 3467 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3468 3469 if (call == MAT_REUSE_MATRIX) { 3470 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3471 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3472 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3473 3474 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3475 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3476 3477 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3478 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3479 3480 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3481 3482 } else { /* call == MAT_INITIAL_MATRIX) */ 3483 PetscBool flg; 3484 3485 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3486 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3487 3488 /* (1) iscol -> nonscalable iscol_local */ 3489 /* Check for special case: each processor gets entire matrix columns */ 3490 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3491 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3492 if (allcolumns) { 3493 iscol_sub = iscol_local; 3494 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3495 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3496 3497 } else { 3498 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3499 PetscInt *idx,*cmap1,k; 3500 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3501 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3502 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3503 count = 0; 3504 k = 0; 3505 for (i=0; i<Ncols; i++) { 3506 j = is_idx[i]; 3507 if (j >= cstart && j < cend) { 3508 /* diagonal part of mat */ 3509 idx[count] = j; 3510 cmap1[count++] = i; /* column index in submat */ 3511 } else if (Bn) { 3512 /* off-diagonal part of mat */ 3513 if (j == garray[k]) { 3514 idx[count] = j; 3515 cmap1[count++] = i; /* column index in submat */ 3516 } else if (j > garray[k]) { 3517 while (j > garray[k] && k < Bn-1) k++; 3518 if (j == garray[k]) { 3519 idx[count] = j; 3520 cmap1[count++] = i; /* column index in submat */ 3521 } 3522 } 3523 } 3524 } 3525 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3526 3527 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3528 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3529 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3530 3531 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3532 } 3533 3534 /* (3) Create sequential Msub */ 3535 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3536 } 3537 3538 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3539 aij = (Mat_SeqAIJ*)(Msub)->data; 3540 ii = aij->i; 3541 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3542 3543 /* 3544 m - number of local rows 3545 Ncols - number of columns (same on all processors) 3546 rstart - first row in new global matrix generated 3547 */ 3548 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3549 3550 if (call == MAT_INITIAL_MATRIX) { 3551 /* (4) Create parallel newmat */ 3552 PetscMPIInt rank,size; 3553 PetscInt csize; 3554 3555 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3556 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3557 3558 /* 3559 Determine the number of non-zeros in the diagonal and off-diagonal 3560 portions of the matrix in order to do correct preallocation 3561 */ 3562 3563 /* first get start and end of "diagonal" columns */ 3564 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3565 if (csize == PETSC_DECIDE) { 3566 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3567 if (mglobal == Ncols) { /* square matrix */ 3568 nlocal = m; 3569 } else { 3570 nlocal = Ncols/size + ((Ncols % size) > rank); 3571 } 3572 } else { 3573 nlocal = csize; 3574 } 3575 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3576 rstart = rend - nlocal; 3577 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3578 3579 /* next, compute all the lengths */ 3580 jj = aij->j; 3581 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3582 olens = dlens + m; 3583 for (i=0; i<m; i++) { 3584 jend = ii[i+1] - ii[i]; 3585 olen = 0; 3586 dlen = 0; 3587 for (j=0; j<jend; j++) { 3588 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3589 else dlen++; 3590 jj++; 3591 } 3592 olens[i] = olen; 3593 dlens[i] = dlen; 3594 } 3595 3596 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3597 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3598 3599 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3600 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3601 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3602 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3603 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3604 ierr = PetscFree(dlens);CHKERRQ(ierr); 3605 3606 } else { /* call == MAT_REUSE_MATRIX */ 3607 M = *newmat; 3608 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3609 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3610 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3611 /* 3612 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3613 rather than the slower MatSetValues(). 3614 */ 3615 M->was_assembled = PETSC_TRUE; 3616 M->assembled = PETSC_FALSE; 3617 } 3618 3619 /* (5) Set values of Msub to *newmat */ 3620 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3621 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3622 3623 jj = aij->j; 3624 aa = aij->a; 3625 for (i=0; i<m; i++) { 3626 row = rstart + i; 3627 nz = ii[i+1] - ii[i]; 3628 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3629 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3630 jj += nz; aa += nz; 3631 } 3632 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3633 3634 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3635 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3636 3637 ierr = PetscFree(colsub);CHKERRQ(ierr); 3638 3639 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3640 if (call == MAT_INITIAL_MATRIX) { 3641 *newmat = M; 3642 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3643 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3644 3645 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3646 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3647 3648 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3649 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3650 3651 if (iscol_local) { 3652 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3653 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3654 } 3655 } 3656 PetscFunctionReturn(0); 3657 } 3658 3659 /* 3660 Not great since it makes two copies of the submatrix, first an SeqAIJ 3661 in local and then by concatenating the local matrices the end result. 3662 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3663 3664 Note: This requires a sequential iscol with all indices. 3665 */ 3666 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3667 { 3668 PetscErrorCode ierr; 3669 PetscMPIInt rank,size; 3670 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3671 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3672 Mat M,Mreuse; 3673 MatScalar *aa,*vwork; 3674 MPI_Comm comm; 3675 Mat_SeqAIJ *aij; 3676 PetscBool colflag,allcolumns=PETSC_FALSE; 3677 3678 PetscFunctionBegin; 3679 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3680 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3681 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3682 3683 /* Check for special case: each processor gets entire matrix columns */ 3684 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3685 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3686 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3687 3688 if (call == MAT_REUSE_MATRIX) { 3689 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3690 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3691 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3692 } else { 3693 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3694 } 3695 3696 /* 3697 m - number of local rows 3698 n - number of columns (same on all processors) 3699 rstart - first row in new global matrix generated 3700 */ 3701 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3702 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3703 if (call == MAT_INITIAL_MATRIX) { 3704 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3705 ii = aij->i; 3706 jj = aij->j; 3707 3708 /* 3709 Determine the number of non-zeros in the diagonal and off-diagonal 3710 portions of the matrix in order to do correct preallocation 3711 */ 3712 3713 /* first get start and end of "diagonal" columns */ 3714 if (csize == PETSC_DECIDE) { 3715 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3716 if (mglobal == n) { /* square matrix */ 3717 nlocal = m; 3718 } else { 3719 nlocal = n/size + ((n % size) > rank); 3720 } 3721 } else { 3722 nlocal = csize; 3723 } 3724 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3725 rstart = rend - nlocal; 3726 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3727 3728 /* next, compute all the lengths */ 3729 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3730 olens = dlens + m; 3731 for (i=0; i<m; i++) { 3732 jend = ii[i+1] - ii[i]; 3733 olen = 0; 3734 dlen = 0; 3735 for (j=0; j<jend; j++) { 3736 if (*jj < rstart || *jj >= rend) olen++; 3737 else dlen++; 3738 jj++; 3739 } 3740 olens[i] = olen; 3741 dlens[i] = dlen; 3742 } 3743 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3744 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3745 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3746 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3747 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3748 ierr = PetscFree(dlens);CHKERRQ(ierr); 3749 } else { 3750 PetscInt ml,nl; 3751 3752 M = *newmat; 3753 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3754 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3755 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3756 /* 3757 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3758 rather than the slower MatSetValues(). 3759 */ 3760 M->was_assembled = PETSC_TRUE; 3761 M->assembled = PETSC_FALSE; 3762 } 3763 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3764 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3765 ii = aij->i; 3766 jj = aij->j; 3767 aa = aij->a; 3768 for (i=0; i<m; i++) { 3769 row = rstart + i; 3770 nz = ii[i+1] - ii[i]; 3771 cwork = jj; jj += nz; 3772 vwork = aa; aa += nz; 3773 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3774 } 3775 3776 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3777 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3778 *newmat = M; 3779 3780 /* save submatrix used in processor for next request */ 3781 if (call == MAT_INITIAL_MATRIX) { 3782 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3783 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3784 } 3785 PetscFunctionReturn(0); 3786 } 3787 3788 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3789 { 3790 PetscInt m,cstart, cend,j,nnz,i,d; 3791 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3792 const PetscInt *JJ; 3793 PetscScalar *values; 3794 PetscErrorCode ierr; 3795 PetscBool nooffprocentries; 3796 3797 PetscFunctionBegin; 3798 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3799 3800 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3801 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3802 m = B->rmap->n; 3803 cstart = B->cmap->rstart; 3804 cend = B->cmap->rend; 3805 rstart = B->rmap->rstart; 3806 3807 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3808 3809 #if defined(PETSC_USE_DEBUG) 3810 for (i=0; i<m; i++) { 3811 nnz = Ii[i+1]- Ii[i]; 3812 JJ = J + Ii[i]; 3813 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3814 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3815 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3816 } 3817 #endif 3818 3819 for (i=0; i<m; i++) { 3820 nnz = Ii[i+1]- Ii[i]; 3821 JJ = J + Ii[i]; 3822 nnz_max = PetscMax(nnz_max,nnz); 3823 d = 0; 3824 for (j=0; j<nnz; j++) { 3825 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3826 } 3827 d_nnz[i] = d; 3828 o_nnz[i] = nnz - d; 3829 } 3830 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3831 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3832 3833 if (v) values = (PetscScalar*)v; 3834 else { 3835 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3836 } 3837 3838 for (i=0; i<m; i++) { 3839 ii = i + rstart; 3840 nnz = Ii[i+1]- Ii[i]; 3841 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3842 } 3843 nooffprocentries = B->nooffprocentries; 3844 B->nooffprocentries = PETSC_TRUE; 3845 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3846 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3847 B->nooffprocentries = nooffprocentries; 3848 3849 if (!v) { 3850 ierr = PetscFree(values);CHKERRQ(ierr); 3851 } 3852 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3853 PetscFunctionReturn(0); 3854 } 3855 3856 /*@ 3857 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3858 (the default parallel PETSc format). 3859 3860 Collective on MPI_Comm 3861 3862 Input Parameters: 3863 + B - the matrix 3864 . i - the indices into j for the start of each local row (starts with zero) 3865 . j - the column indices for each local row (starts with zero) 3866 - v - optional values in the matrix 3867 3868 Level: developer 3869 3870 Notes: 3871 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3872 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3873 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3874 3875 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3876 3877 The format which is used for the sparse matrix input, is equivalent to a 3878 row-major ordering.. i.e for the following matrix, the input data expected is 3879 as shown 3880 3881 $ 1 0 0 3882 $ 2 0 3 P0 3883 $ ------- 3884 $ 4 5 6 P1 3885 $ 3886 $ Process0 [P0]: rows_owned=[0,1] 3887 $ i = {0,1,3} [size = nrow+1 = 2+1] 3888 $ j = {0,0,2} [size = 3] 3889 $ v = {1,2,3} [size = 3] 3890 $ 3891 $ Process1 [P1]: rows_owned=[2] 3892 $ i = {0,3} [size = nrow+1 = 1+1] 3893 $ j = {0,1,2} [size = 3] 3894 $ v = {4,5,6} [size = 3] 3895 3896 .keywords: matrix, aij, compressed row, sparse, parallel 3897 3898 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3899 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3900 @*/ 3901 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3902 { 3903 PetscErrorCode ierr; 3904 3905 PetscFunctionBegin; 3906 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3907 PetscFunctionReturn(0); 3908 } 3909 3910 /*@C 3911 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3912 (the default parallel PETSc format). For good matrix assembly performance 3913 the user should preallocate the matrix storage by setting the parameters 3914 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3915 performance can be increased by more than a factor of 50. 3916 3917 Collective on MPI_Comm 3918 3919 Input Parameters: 3920 + B - the matrix 3921 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3922 (same value is used for all local rows) 3923 . d_nnz - array containing the number of nonzeros in the various rows of the 3924 DIAGONAL portion of the local submatrix (possibly different for each row) 3925 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3926 The size of this array is equal to the number of local rows, i.e 'm'. 3927 For matrices that will be factored, you must leave room for (and set) 3928 the diagonal entry even if it is zero. 3929 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3930 submatrix (same value is used for all local rows). 3931 - o_nnz - array containing the number of nonzeros in the various rows of the 3932 OFF-DIAGONAL portion of the local submatrix (possibly different for 3933 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3934 structure. The size of this array is equal to the number 3935 of local rows, i.e 'm'. 3936 3937 If the *_nnz parameter is given then the *_nz parameter is ignored 3938 3939 The AIJ format (also called the Yale sparse matrix format or 3940 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3941 storage. The stored row and column indices begin with zero. 3942 See Users-Manual: ch_mat for details. 3943 3944 The parallel matrix is partitioned such that the first m0 rows belong to 3945 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3946 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3947 3948 The DIAGONAL portion of the local submatrix of a processor can be defined 3949 as the submatrix which is obtained by extraction the part corresponding to 3950 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3951 first row that belongs to the processor, r2 is the last row belonging to 3952 the this processor, and c1-c2 is range of indices of the local part of a 3953 vector suitable for applying the matrix to. This is an mxn matrix. In the 3954 common case of a square matrix, the row and column ranges are the same and 3955 the DIAGONAL part is also square. The remaining portion of the local 3956 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3957 3958 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3959 3960 You can call MatGetInfo() to get information on how effective the preallocation was; 3961 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3962 You can also run with the option -info and look for messages with the string 3963 malloc in them to see if additional memory allocation was needed. 3964 3965 Example usage: 3966 3967 Consider the following 8x8 matrix with 34 non-zero values, that is 3968 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3969 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3970 as follows: 3971 3972 .vb 3973 1 2 0 | 0 3 0 | 0 4 3974 Proc0 0 5 6 | 7 0 0 | 8 0 3975 9 0 10 | 11 0 0 | 12 0 3976 ------------------------------------- 3977 13 0 14 | 15 16 17 | 0 0 3978 Proc1 0 18 0 | 19 20 21 | 0 0 3979 0 0 0 | 22 23 0 | 24 0 3980 ------------------------------------- 3981 Proc2 25 26 27 | 0 0 28 | 29 0 3982 30 0 0 | 31 32 33 | 0 34 3983 .ve 3984 3985 This can be represented as a collection of submatrices as: 3986 3987 .vb 3988 A B C 3989 D E F 3990 G H I 3991 .ve 3992 3993 Where the submatrices A,B,C are owned by proc0, D,E,F are 3994 owned by proc1, G,H,I are owned by proc2. 3995 3996 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3997 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3998 The 'M','N' parameters are 8,8, and have the same values on all procs. 3999 4000 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4001 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4002 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4003 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4004 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4005 matrix, ans [DF] as another SeqAIJ matrix. 4006 4007 When d_nz, o_nz parameters are specified, d_nz storage elements are 4008 allocated for every row of the local diagonal submatrix, and o_nz 4009 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4010 One way to choose d_nz and o_nz is to use the max nonzerors per local 4011 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4012 In this case, the values of d_nz,o_nz are: 4013 .vb 4014 proc0 : dnz = 2, o_nz = 2 4015 proc1 : dnz = 3, o_nz = 2 4016 proc2 : dnz = 1, o_nz = 4 4017 .ve 4018 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4019 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4020 for proc3. i.e we are using 12+15+10=37 storage locations to store 4021 34 values. 4022 4023 When d_nnz, o_nnz parameters are specified, the storage is specified 4024 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4025 In the above case the values for d_nnz,o_nnz are: 4026 .vb 4027 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4028 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4029 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4030 .ve 4031 Here the space allocated is sum of all the above values i.e 34, and 4032 hence pre-allocation is perfect. 4033 4034 Level: intermediate 4035 4036 .keywords: matrix, aij, compressed row, sparse, parallel 4037 4038 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4039 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4040 @*/ 4041 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4042 { 4043 PetscErrorCode ierr; 4044 4045 PetscFunctionBegin; 4046 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4047 PetscValidType(B,1); 4048 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4049 PetscFunctionReturn(0); 4050 } 4051 4052 /*@ 4053 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4054 CSR format the local rows. 4055 4056 Collective on MPI_Comm 4057 4058 Input Parameters: 4059 + comm - MPI communicator 4060 . m - number of local rows (Cannot be PETSC_DECIDE) 4061 . n - This value should be the same as the local size used in creating the 4062 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4063 calculated if N is given) For square matrices n is almost always m. 4064 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4065 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4066 . i - row indices 4067 . j - column indices 4068 - a - matrix values 4069 4070 Output Parameter: 4071 . mat - the matrix 4072 4073 Level: intermediate 4074 4075 Notes: 4076 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4077 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4078 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4079 4080 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4081 4082 The format which is used for the sparse matrix input, is equivalent to a 4083 row-major ordering.. i.e for the following matrix, the input data expected is 4084 as shown 4085 4086 $ 1 0 0 4087 $ 2 0 3 P0 4088 $ ------- 4089 $ 4 5 6 P1 4090 $ 4091 $ Process0 [P0]: rows_owned=[0,1] 4092 $ i = {0,1,3} [size = nrow+1 = 2+1] 4093 $ j = {0,0,2} [size = 3] 4094 $ v = {1,2,3} [size = 3] 4095 $ 4096 $ Process1 [P1]: rows_owned=[2] 4097 $ i = {0,3} [size = nrow+1 = 1+1] 4098 $ j = {0,1,2} [size = 3] 4099 $ v = {4,5,6} [size = 3] 4100 4101 .keywords: matrix, aij, compressed row, sparse, parallel 4102 4103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4104 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4105 @*/ 4106 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4107 { 4108 PetscErrorCode ierr; 4109 4110 PetscFunctionBegin; 4111 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4112 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4113 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4114 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4115 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4116 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4117 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4118 PetscFunctionReturn(0); 4119 } 4120 4121 /*@C 4122 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4123 (the default parallel PETSc format). For good matrix assembly performance 4124 the user should preallocate the matrix storage by setting the parameters 4125 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4126 performance can be increased by more than a factor of 50. 4127 4128 Collective on MPI_Comm 4129 4130 Input Parameters: 4131 + comm - MPI communicator 4132 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4133 This value should be the same as the local size used in creating the 4134 y vector for the matrix-vector product y = Ax. 4135 . n - This value should be the same as the local size used in creating the 4136 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4137 calculated if N is given) For square matrices n is almost always m. 4138 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4139 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4140 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4141 (same value is used for all local rows) 4142 . d_nnz - array containing the number of nonzeros in the various rows of the 4143 DIAGONAL portion of the local submatrix (possibly different for each row) 4144 or NULL, if d_nz is used to specify the nonzero structure. 4145 The size of this array is equal to the number of local rows, i.e 'm'. 4146 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4147 submatrix (same value is used for all local rows). 4148 - o_nnz - array containing the number of nonzeros in the various rows of the 4149 OFF-DIAGONAL portion of the local submatrix (possibly different for 4150 each row) or NULL, if o_nz is used to specify the nonzero 4151 structure. The size of this array is equal to the number 4152 of local rows, i.e 'm'. 4153 4154 Output Parameter: 4155 . A - the matrix 4156 4157 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4158 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4159 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4160 4161 Notes: 4162 If the *_nnz parameter is given then the *_nz parameter is ignored 4163 4164 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4165 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4166 storage requirements for this matrix. 4167 4168 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4169 processor than it must be used on all processors that share the object for 4170 that argument. 4171 4172 The user MUST specify either the local or global matrix dimensions 4173 (possibly both). 4174 4175 The parallel matrix is partitioned across processors such that the 4176 first m0 rows belong to process 0, the next m1 rows belong to 4177 process 1, the next m2 rows belong to process 2 etc.. where 4178 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4179 values corresponding to [m x N] submatrix. 4180 4181 The columns are logically partitioned with the n0 columns belonging 4182 to 0th partition, the next n1 columns belonging to the next 4183 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4184 4185 The DIAGONAL portion of the local submatrix on any given processor 4186 is the submatrix corresponding to the rows and columns m,n 4187 corresponding to the given processor. i.e diagonal matrix on 4188 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4189 etc. The remaining portion of the local submatrix [m x (N-n)] 4190 constitute the OFF-DIAGONAL portion. The example below better 4191 illustrates this concept. 4192 4193 For a square global matrix we define each processor's diagonal portion 4194 to be its local rows and the corresponding columns (a square submatrix); 4195 each processor's off-diagonal portion encompasses the remainder of the 4196 local matrix (a rectangular submatrix). 4197 4198 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4199 4200 When calling this routine with a single process communicator, a matrix of 4201 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4202 type of communicator, use the construction mechanism 4203 .vb 4204 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4205 .ve 4206 4207 $ MatCreate(...,&A); 4208 $ MatSetType(A,MATMPIAIJ); 4209 $ MatSetSizes(A, m,n,M,N); 4210 $ MatMPIAIJSetPreallocation(A,...); 4211 4212 By default, this format uses inodes (identical nodes) when possible. 4213 We search for consecutive rows with the same nonzero structure, thereby 4214 reusing matrix information to achieve increased efficiency. 4215 4216 Options Database Keys: 4217 + -mat_no_inode - Do not use inodes 4218 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4219 4220 4221 4222 Example usage: 4223 4224 Consider the following 8x8 matrix with 34 non-zero values, that is 4225 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4226 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4227 as follows 4228 4229 .vb 4230 1 2 0 | 0 3 0 | 0 4 4231 Proc0 0 5 6 | 7 0 0 | 8 0 4232 9 0 10 | 11 0 0 | 12 0 4233 ------------------------------------- 4234 13 0 14 | 15 16 17 | 0 0 4235 Proc1 0 18 0 | 19 20 21 | 0 0 4236 0 0 0 | 22 23 0 | 24 0 4237 ------------------------------------- 4238 Proc2 25 26 27 | 0 0 28 | 29 0 4239 30 0 0 | 31 32 33 | 0 34 4240 .ve 4241 4242 This can be represented as a collection of submatrices as 4243 4244 .vb 4245 A B C 4246 D E F 4247 G H I 4248 .ve 4249 4250 Where the submatrices A,B,C are owned by proc0, D,E,F are 4251 owned by proc1, G,H,I are owned by proc2. 4252 4253 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4254 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4255 The 'M','N' parameters are 8,8, and have the same values on all procs. 4256 4257 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4258 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4259 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4260 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4261 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4262 matrix, ans [DF] as another SeqAIJ matrix. 4263 4264 When d_nz, o_nz parameters are specified, d_nz storage elements are 4265 allocated for every row of the local diagonal submatrix, and o_nz 4266 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4267 One way to choose d_nz and o_nz is to use the max nonzerors per local 4268 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4269 In this case, the values of d_nz,o_nz are 4270 .vb 4271 proc0 : dnz = 2, o_nz = 2 4272 proc1 : dnz = 3, o_nz = 2 4273 proc2 : dnz = 1, o_nz = 4 4274 .ve 4275 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4276 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4277 for proc3. i.e we are using 12+15+10=37 storage locations to store 4278 34 values. 4279 4280 When d_nnz, o_nnz parameters are specified, the storage is specified 4281 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4282 In the above case the values for d_nnz,o_nnz are 4283 .vb 4284 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4285 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4286 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4287 .ve 4288 Here the space allocated is sum of all the above values i.e 34, and 4289 hence pre-allocation is perfect. 4290 4291 Level: intermediate 4292 4293 .keywords: matrix, aij, compressed row, sparse, parallel 4294 4295 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4296 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4297 @*/ 4298 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4299 { 4300 PetscErrorCode ierr; 4301 PetscMPIInt size; 4302 4303 PetscFunctionBegin; 4304 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4305 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4306 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4307 if (size > 1) { 4308 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4309 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4310 } else { 4311 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4312 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4313 } 4314 PetscFunctionReturn(0); 4315 } 4316 4317 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4318 { 4319 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4320 PetscBool flg; 4321 PetscErrorCode ierr; 4322 4323 PetscFunctionBegin; 4324 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4325 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4326 if (Ad) *Ad = a->A; 4327 if (Ao) *Ao = a->B; 4328 if (colmap) *colmap = a->garray; 4329 PetscFunctionReturn(0); 4330 } 4331 4332 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4333 { 4334 PetscErrorCode ierr; 4335 PetscInt m,N,i,rstart,nnz,Ii; 4336 PetscInt *indx; 4337 PetscScalar *values; 4338 4339 PetscFunctionBegin; 4340 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4341 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4342 PetscInt *dnz,*onz,sum,bs,cbs; 4343 4344 if (n == PETSC_DECIDE) { 4345 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4346 } 4347 /* Check sum(n) = N */ 4348 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4349 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4350 4351 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4352 rstart -= m; 4353 4354 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4355 for (i=0; i<m; i++) { 4356 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4357 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4358 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4359 } 4360 4361 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4362 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4363 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4364 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4365 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4366 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4367 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4368 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4369 } 4370 4371 /* numeric phase */ 4372 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4373 for (i=0; i<m; i++) { 4374 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4375 Ii = i + rstart; 4376 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4377 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4378 } 4379 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4380 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4381 PetscFunctionReturn(0); 4382 } 4383 4384 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4385 { 4386 PetscErrorCode ierr; 4387 PetscMPIInt rank; 4388 PetscInt m,N,i,rstart,nnz; 4389 size_t len; 4390 const PetscInt *indx; 4391 PetscViewer out; 4392 char *name; 4393 Mat B; 4394 const PetscScalar *values; 4395 4396 PetscFunctionBegin; 4397 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4398 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4399 /* Should this be the type of the diagonal block of A? */ 4400 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4401 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4402 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4403 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4404 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4405 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4406 for (i=0; i<m; i++) { 4407 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4408 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4409 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4410 } 4411 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4412 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4413 4414 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4415 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4416 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4417 sprintf(name,"%s.%d",outfile,rank); 4418 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4419 ierr = PetscFree(name);CHKERRQ(ierr); 4420 ierr = MatView(B,out);CHKERRQ(ierr); 4421 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4422 ierr = MatDestroy(&B);CHKERRQ(ierr); 4423 PetscFunctionReturn(0); 4424 } 4425 4426 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4427 { 4428 PetscErrorCode ierr; 4429 Mat_Merge_SeqsToMPI *merge; 4430 PetscContainer container; 4431 4432 PetscFunctionBegin; 4433 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4434 if (container) { 4435 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4436 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4437 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4438 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4439 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4440 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4441 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4442 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4443 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4444 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4445 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4446 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4447 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4448 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4449 ierr = PetscFree(merge);CHKERRQ(ierr); 4450 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4451 } 4452 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4453 PetscFunctionReturn(0); 4454 } 4455 4456 #include <../src/mat/utils/freespace.h> 4457 #include <petscbt.h> 4458 4459 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4460 { 4461 PetscErrorCode ierr; 4462 MPI_Comm comm; 4463 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4464 PetscMPIInt size,rank,taga,*len_s; 4465 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4466 PetscInt proc,m; 4467 PetscInt **buf_ri,**buf_rj; 4468 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4469 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4470 MPI_Request *s_waits,*r_waits; 4471 MPI_Status *status; 4472 MatScalar *aa=a->a; 4473 MatScalar **abuf_r,*ba_i; 4474 Mat_Merge_SeqsToMPI *merge; 4475 PetscContainer container; 4476 4477 PetscFunctionBegin; 4478 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4479 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4480 4481 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4482 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4483 4484 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4485 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4486 4487 bi = merge->bi; 4488 bj = merge->bj; 4489 buf_ri = merge->buf_ri; 4490 buf_rj = merge->buf_rj; 4491 4492 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4493 owners = merge->rowmap->range; 4494 len_s = merge->len_s; 4495 4496 /* send and recv matrix values */ 4497 /*-----------------------------*/ 4498 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4499 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4500 4501 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4502 for (proc=0,k=0; proc<size; proc++) { 4503 if (!len_s[proc]) continue; 4504 i = owners[proc]; 4505 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4506 k++; 4507 } 4508 4509 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4510 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4511 ierr = PetscFree(status);CHKERRQ(ierr); 4512 4513 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4514 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4515 4516 /* insert mat values of mpimat */ 4517 /*----------------------------*/ 4518 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4519 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4520 4521 for (k=0; k<merge->nrecv; k++) { 4522 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4523 nrows = *(buf_ri_k[k]); 4524 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4525 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4526 } 4527 4528 /* set values of ba */ 4529 m = merge->rowmap->n; 4530 for (i=0; i<m; i++) { 4531 arow = owners[rank] + i; 4532 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4533 bnzi = bi[i+1] - bi[i]; 4534 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4535 4536 /* add local non-zero vals of this proc's seqmat into ba */ 4537 anzi = ai[arow+1] - ai[arow]; 4538 aj = a->j + ai[arow]; 4539 aa = a->a + ai[arow]; 4540 nextaj = 0; 4541 for (j=0; nextaj<anzi; j++) { 4542 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4543 ba_i[j] += aa[nextaj++]; 4544 } 4545 } 4546 4547 /* add received vals into ba */ 4548 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4549 /* i-th row */ 4550 if (i == *nextrow[k]) { 4551 anzi = *(nextai[k]+1) - *nextai[k]; 4552 aj = buf_rj[k] + *(nextai[k]); 4553 aa = abuf_r[k] + *(nextai[k]); 4554 nextaj = 0; 4555 for (j=0; nextaj<anzi; j++) { 4556 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4557 ba_i[j] += aa[nextaj++]; 4558 } 4559 } 4560 nextrow[k]++; nextai[k]++; 4561 } 4562 } 4563 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4564 } 4565 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4566 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4567 4568 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4569 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4570 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4571 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4572 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4573 PetscFunctionReturn(0); 4574 } 4575 4576 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4577 { 4578 PetscErrorCode ierr; 4579 Mat B_mpi; 4580 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4581 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4582 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4583 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4584 PetscInt len,proc,*dnz,*onz,bs,cbs; 4585 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4586 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4587 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4588 MPI_Status *status; 4589 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4590 PetscBT lnkbt; 4591 Mat_Merge_SeqsToMPI *merge; 4592 PetscContainer container; 4593 4594 PetscFunctionBegin; 4595 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4596 4597 /* make sure it is a PETSc comm */ 4598 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4599 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4600 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4601 4602 ierr = PetscNew(&merge);CHKERRQ(ierr); 4603 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4604 4605 /* determine row ownership */ 4606 /*---------------------------------------------------------*/ 4607 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4608 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4609 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4610 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4611 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4612 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4613 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4614 4615 m = merge->rowmap->n; 4616 owners = merge->rowmap->range; 4617 4618 /* determine the number of messages to send, their lengths */ 4619 /*---------------------------------------------------------*/ 4620 len_s = merge->len_s; 4621 4622 len = 0; /* length of buf_si[] */ 4623 merge->nsend = 0; 4624 for (proc=0; proc<size; proc++) { 4625 len_si[proc] = 0; 4626 if (proc == rank) { 4627 len_s[proc] = 0; 4628 } else { 4629 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4630 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4631 } 4632 if (len_s[proc]) { 4633 merge->nsend++; 4634 nrows = 0; 4635 for (i=owners[proc]; i<owners[proc+1]; i++) { 4636 if (ai[i+1] > ai[i]) nrows++; 4637 } 4638 len_si[proc] = 2*(nrows+1); 4639 len += len_si[proc]; 4640 } 4641 } 4642 4643 /* determine the number and length of messages to receive for ij-structure */ 4644 /*-------------------------------------------------------------------------*/ 4645 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4646 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4647 4648 /* post the Irecv of j-structure */ 4649 /*-------------------------------*/ 4650 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4651 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4652 4653 /* post the Isend of j-structure */ 4654 /*--------------------------------*/ 4655 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4656 4657 for (proc=0, k=0; proc<size; proc++) { 4658 if (!len_s[proc]) continue; 4659 i = owners[proc]; 4660 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4661 k++; 4662 } 4663 4664 /* receives and sends of j-structure are complete */ 4665 /*------------------------------------------------*/ 4666 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4667 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4668 4669 /* send and recv i-structure */ 4670 /*---------------------------*/ 4671 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4672 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4673 4674 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4675 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4676 for (proc=0,k=0; proc<size; proc++) { 4677 if (!len_s[proc]) continue; 4678 /* form outgoing message for i-structure: 4679 buf_si[0]: nrows to be sent 4680 [1:nrows]: row index (global) 4681 [nrows+1:2*nrows+1]: i-structure index 4682 */ 4683 /*-------------------------------------------*/ 4684 nrows = len_si[proc]/2 - 1; 4685 buf_si_i = buf_si + nrows+1; 4686 buf_si[0] = nrows; 4687 buf_si_i[0] = 0; 4688 nrows = 0; 4689 for (i=owners[proc]; i<owners[proc+1]; i++) { 4690 anzi = ai[i+1] - ai[i]; 4691 if (anzi) { 4692 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4693 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4694 nrows++; 4695 } 4696 } 4697 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4698 k++; 4699 buf_si += len_si[proc]; 4700 } 4701 4702 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4703 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4704 4705 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4706 for (i=0; i<merge->nrecv; i++) { 4707 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4708 } 4709 4710 ierr = PetscFree(len_si);CHKERRQ(ierr); 4711 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4712 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4713 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4714 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4715 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4716 ierr = PetscFree(status);CHKERRQ(ierr); 4717 4718 /* compute a local seq matrix in each processor */ 4719 /*----------------------------------------------*/ 4720 /* allocate bi array and free space for accumulating nonzero column info */ 4721 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4722 bi[0] = 0; 4723 4724 /* create and initialize a linked list */ 4725 nlnk = N+1; 4726 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4727 4728 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4729 len = ai[owners[rank+1]] - ai[owners[rank]]; 4730 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4731 4732 current_space = free_space; 4733 4734 /* determine symbolic info for each local row */ 4735 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4736 4737 for (k=0; k<merge->nrecv; k++) { 4738 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4739 nrows = *buf_ri_k[k]; 4740 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4741 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4742 } 4743 4744 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4745 len = 0; 4746 for (i=0; i<m; i++) { 4747 bnzi = 0; 4748 /* add local non-zero cols of this proc's seqmat into lnk */ 4749 arow = owners[rank] + i; 4750 anzi = ai[arow+1] - ai[arow]; 4751 aj = a->j + ai[arow]; 4752 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4753 bnzi += nlnk; 4754 /* add received col data into lnk */ 4755 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4756 if (i == *nextrow[k]) { /* i-th row */ 4757 anzi = *(nextai[k]+1) - *nextai[k]; 4758 aj = buf_rj[k] + *nextai[k]; 4759 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4760 bnzi += nlnk; 4761 nextrow[k]++; nextai[k]++; 4762 } 4763 } 4764 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4765 4766 /* if free space is not available, make more free space */ 4767 if (current_space->local_remaining<bnzi) { 4768 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4769 nspacedouble++; 4770 } 4771 /* copy data into free space, then initialize lnk */ 4772 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4773 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4774 4775 current_space->array += bnzi; 4776 current_space->local_used += bnzi; 4777 current_space->local_remaining -= bnzi; 4778 4779 bi[i+1] = bi[i] + bnzi; 4780 } 4781 4782 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4783 4784 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4785 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4786 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4787 4788 /* create symbolic parallel matrix B_mpi */ 4789 /*---------------------------------------*/ 4790 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4791 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4792 if (n==PETSC_DECIDE) { 4793 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4794 } else { 4795 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4796 } 4797 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4798 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4799 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4800 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4801 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4802 4803 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4804 B_mpi->assembled = PETSC_FALSE; 4805 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4806 merge->bi = bi; 4807 merge->bj = bj; 4808 merge->buf_ri = buf_ri; 4809 merge->buf_rj = buf_rj; 4810 merge->coi = NULL; 4811 merge->coj = NULL; 4812 merge->owners_co = NULL; 4813 4814 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4815 4816 /* attach the supporting struct to B_mpi for reuse */ 4817 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4818 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4819 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4820 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4821 *mpimat = B_mpi; 4822 4823 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4824 PetscFunctionReturn(0); 4825 } 4826 4827 /*@C 4828 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4829 matrices from each processor 4830 4831 Collective on MPI_Comm 4832 4833 Input Parameters: 4834 + comm - the communicators the parallel matrix will live on 4835 . seqmat - the input sequential matrices 4836 . m - number of local rows (or PETSC_DECIDE) 4837 . n - number of local columns (or PETSC_DECIDE) 4838 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4839 4840 Output Parameter: 4841 . mpimat - the parallel matrix generated 4842 4843 Level: advanced 4844 4845 Notes: 4846 The dimensions of the sequential matrix in each processor MUST be the same. 4847 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4848 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4849 @*/ 4850 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4851 { 4852 PetscErrorCode ierr; 4853 PetscMPIInt size; 4854 4855 PetscFunctionBegin; 4856 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4857 if (size == 1) { 4858 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4859 if (scall == MAT_INITIAL_MATRIX) { 4860 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4861 } else { 4862 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4863 } 4864 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4865 PetscFunctionReturn(0); 4866 } 4867 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4868 if (scall == MAT_INITIAL_MATRIX) { 4869 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4870 } 4871 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4872 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4873 PetscFunctionReturn(0); 4874 } 4875 4876 /*@ 4877 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4878 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4879 with MatGetSize() 4880 4881 Not Collective 4882 4883 Input Parameters: 4884 + A - the matrix 4885 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4886 4887 Output Parameter: 4888 . A_loc - the local sequential matrix generated 4889 4890 Level: developer 4891 4892 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4893 4894 @*/ 4895 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4896 { 4897 PetscErrorCode ierr; 4898 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4899 Mat_SeqAIJ *mat,*a,*b; 4900 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4901 MatScalar *aa,*ba,*cam; 4902 PetscScalar *ca; 4903 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4904 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4905 PetscBool match; 4906 MPI_Comm comm; 4907 PetscMPIInt size; 4908 4909 PetscFunctionBegin; 4910 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4911 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4912 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4913 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4914 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4915 4916 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4917 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4918 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4919 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4920 aa = a->a; ba = b->a; 4921 if (scall == MAT_INITIAL_MATRIX) { 4922 if (size == 1) { 4923 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4924 PetscFunctionReturn(0); 4925 } 4926 4927 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4928 ci[0] = 0; 4929 for (i=0; i<am; i++) { 4930 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4931 } 4932 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4933 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4934 k = 0; 4935 for (i=0; i<am; i++) { 4936 ncols_o = bi[i+1] - bi[i]; 4937 ncols_d = ai[i+1] - ai[i]; 4938 /* off-diagonal portion of A */ 4939 for (jo=0; jo<ncols_o; jo++) { 4940 col = cmap[*bj]; 4941 if (col >= cstart) break; 4942 cj[k] = col; bj++; 4943 ca[k++] = *ba++; 4944 } 4945 /* diagonal portion of A */ 4946 for (j=0; j<ncols_d; j++) { 4947 cj[k] = cstart + *aj++; 4948 ca[k++] = *aa++; 4949 } 4950 /* off-diagonal portion of A */ 4951 for (j=jo; j<ncols_o; j++) { 4952 cj[k] = cmap[*bj++]; 4953 ca[k++] = *ba++; 4954 } 4955 } 4956 /* put together the new matrix */ 4957 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4958 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4959 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4960 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4961 mat->free_a = PETSC_TRUE; 4962 mat->free_ij = PETSC_TRUE; 4963 mat->nonew = 0; 4964 } else if (scall == MAT_REUSE_MATRIX) { 4965 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4966 ci = mat->i; cj = mat->j; cam = mat->a; 4967 for (i=0; i<am; i++) { 4968 /* off-diagonal portion of A */ 4969 ncols_o = bi[i+1] - bi[i]; 4970 for (jo=0; jo<ncols_o; jo++) { 4971 col = cmap[*bj]; 4972 if (col >= cstart) break; 4973 *cam++ = *ba++; bj++; 4974 } 4975 /* diagonal portion of A */ 4976 ncols_d = ai[i+1] - ai[i]; 4977 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4978 /* off-diagonal portion of A */ 4979 for (j=jo; j<ncols_o; j++) { 4980 *cam++ = *ba++; bj++; 4981 } 4982 } 4983 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4984 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4985 PetscFunctionReturn(0); 4986 } 4987 4988 /*@C 4989 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4990 4991 Not Collective 4992 4993 Input Parameters: 4994 + A - the matrix 4995 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4996 - row, col - index sets of rows and columns to extract (or NULL) 4997 4998 Output Parameter: 4999 . A_loc - the local sequential matrix generated 5000 5001 Level: developer 5002 5003 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5004 5005 @*/ 5006 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5007 { 5008 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5009 PetscErrorCode ierr; 5010 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5011 IS isrowa,iscola; 5012 Mat *aloc; 5013 PetscBool match; 5014 5015 PetscFunctionBegin; 5016 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5017 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5018 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5019 if (!row) { 5020 start = A->rmap->rstart; end = A->rmap->rend; 5021 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5022 } else { 5023 isrowa = *row; 5024 } 5025 if (!col) { 5026 start = A->cmap->rstart; 5027 cmap = a->garray; 5028 nzA = a->A->cmap->n; 5029 nzB = a->B->cmap->n; 5030 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5031 ncols = 0; 5032 for (i=0; i<nzB; i++) { 5033 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5034 else break; 5035 } 5036 imark = i; 5037 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5038 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5039 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5040 } else { 5041 iscola = *col; 5042 } 5043 if (scall != MAT_INITIAL_MATRIX) { 5044 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5045 aloc[0] = *A_loc; 5046 } 5047 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5048 *A_loc = aloc[0]; 5049 ierr = PetscFree(aloc);CHKERRQ(ierr); 5050 if (!row) { 5051 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5052 } 5053 if (!col) { 5054 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5055 } 5056 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5057 PetscFunctionReturn(0); 5058 } 5059 5060 /*@C 5061 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5062 5063 Collective on Mat 5064 5065 Input Parameters: 5066 + A,B - the matrices in mpiaij format 5067 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5068 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5069 5070 Output Parameter: 5071 + rowb, colb - index sets of rows and columns of B to extract 5072 - B_seq - the sequential matrix generated 5073 5074 Level: developer 5075 5076 @*/ 5077 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5078 { 5079 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5080 PetscErrorCode ierr; 5081 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5082 IS isrowb,iscolb; 5083 Mat *bseq=NULL; 5084 5085 PetscFunctionBegin; 5086 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5087 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5088 } 5089 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5090 5091 if (scall == MAT_INITIAL_MATRIX) { 5092 start = A->cmap->rstart; 5093 cmap = a->garray; 5094 nzA = a->A->cmap->n; 5095 nzB = a->B->cmap->n; 5096 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5097 ncols = 0; 5098 for (i=0; i<nzB; i++) { /* row < local row index */ 5099 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5100 else break; 5101 } 5102 imark = i; 5103 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5104 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5105 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5106 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5107 } else { 5108 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5109 isrowb = *rowb; iscolb = *colb; 5110 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5111 bseq[0] = *B_seq; 5112 } 5113 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5114 *B_seq = bseq[0]; 5115 ierr = PetscFree(bseq);CHKERRQ(ierr); 5116 if (!rowb) { 5117 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5118 } else { 5119 *rowb = isrowb; 5120 } 5121 if (!colb) { 5122 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5123 } else { 5124 *colb = iscolb; 5125 } 5126 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5127 PetscFunctionReturn(0); 5128 } 5129 5130 /* 5131 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5132 of the OFF-DIAGONAL portion of local A 5133 5134 Collective on Mat 5135 5136 Input Parameters: 5137 + A,B - the matrices in mpiaij format 5138 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5139 5140 Output Parameter: 5141 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5142 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5143 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5144 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5145 5146 Level: developer 5147 5148 */ 5149 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5150 { 5151 VecScatter_MPI_General *gen_to,*gen_from; 5152 PetscErrorCode ierr; 5153 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5154 Mat_SeqAIJ *b_oth; 5155 VecScatter ctx; 5156 MPI_Comm comm; 5157 PetscMPIInt *rprocs,*sprocs,tag,rank; 5158 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5159 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5160 PetscScalar *b_otha,*bufa,*bufA,*vals; 5161 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5162 MPI_Request *rwaits = NULL,*swaits = NULL; 5163 MPI_Status *sstatus,rstatus; 5164 PetscMPIInt jj,size; 5165 VecScatterType type; 5166 PetscBool mpi1; 5167 5168 PetscFunctionBegin; 5169 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5170 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5171 5172 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5173 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5174 } 5175 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5176 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5177 5178 if (size == 1) { 5179 startsj_s = NULL; 5180 bufa_ptr = NULL; 5181 *B_oth = NULL; 5182 PetscFunctionReturn(0); 5183 } 5184 5185 ctx = a->Mvctx; 5186 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5187 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5188 if (!mpi1) { 5189 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5190 thus create a->Mvctx_mpi1 */ 5191 if (!a->Mvctx_mpi1) { 5192 a->Mvctx_mpi1_flg = PETSC_TRUE; 5193 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5194 } 5195 ctx = a->Mvctx_mpi1; 5196 } 5197 tag = ((PetscObject)ctx)->tag; 5198 5199 gen_to = (VecScatter_MPI_General*)ctx->todata; 5200 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5201 nrecvs = gen_from->n; 5202 nsends = gen_to->n; 5203 5204 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5205 srow = gen_to->indices; /* local row index to be sent */ 5206 sstarts = gen_to->starts; 5207 sprocs = gen_to->procs; 5208 sstatus = gen_to->sstatus; 5209 sbs = gen_to->bs; 5210 rstarts = gen_from->starts; 5211 rprocs = gen_from->procs; 5212 rbs = gen_from->bs; 5213 5214 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5215 if (scall == MAT_INITIAL_MATRIX) { 5216 /* i-array */ 5217 /*---------*/ 5218 /* post receives */ 5219 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5220 for (i=0; i<nrecvs; i++) { 5221 rowlen = rvalues + rstarts[i]*rbs; 5222 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5223 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5224 } 5225 5226 /* pack the outgoing message */ 5227 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5228 5229 sstartsj[0] = 0; 5230 rstartsj[0] = 0; 5231 len = 0; /* total length of j or a array to be sent */ 5232 k = 0; 5233 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5234 for (i=0; i<nsends; i++) { 5235 rowlen = svalues + sstarts[i]*sbs; 5236 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5237 for (j=0; j<nrows; j++) { 5238 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5239 for (l=0; l<sbs; l++) { 5240 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5241 5242 rowlen[j*sbs+l] = ncols; 5243 5244 len += ncols; 5245 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5246 } 5247 k++; 5248 } 5249 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5250 5251 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5252 } 5253 /* recvs and sends of i-array are completed */ 5254 i = nrecvs; 5255 while (i--) { 5256 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5257 } 5258 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5259 ierr = PetscFree(svalues);CHKERRQ(ierr); 5260 5261 /* allocate buffers for sending j and a arrays */ 5262 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5263 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5264 5265 /* create i-array of B_oth */ 5266 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5267 5268 b_othi[0] = 0; 5269 len = 0; /* total length of j or a array to be received */ 5270 k = 0; 5271 for (i=0; i<nrecvs; i++) { 5272 rowlen = rvalues + rstarts[i]*rbs; 5273 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5274 for (j=0; j<nrows; j++) { 5275 b_othi[k+1] = b_othi[k] + rowlen[j]; 5276 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5277 k++; 5278 } 5279 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5280 } 5281 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5282 5283 /* allocate space for j and a arrrays of B_oth */ 5284 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5285 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5286 5287 /* j-array */ 5288 /*---------*/ 5289 /* post receives of j-array */ 5290 for (i=0; i<nrecvs; i++) { 5291 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5292 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5293 } 5294 5295 /* pack the outgoing message j-array */ 5296 k = 0; 5297 for (i=0; i<nsends; i++) { 5298 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5299 bufJ = bufj+sstartsj[i]; 5300 for (j=0; j<nrows; j++) { 5301 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5302 for (ll=0; ll<sbs; ll++) { 5303 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5304 for (l=0; l<ncols; l++) { 5305 *bufJ++ = cols[l]; 5306 } 5307 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5308 } 5309 } 5310 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5311 } 5312 5313 /* recvs and sends of j-array are completed */ 5314 i = nrecvs; 5315 while (i--) { 5316 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5317 } 5318 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5319 } else if (scall == MAT_REUSE_MATRIX) { 5320 sstartsj = *startsj_s; 5321 rstartsj = *startsj_r; 5322 bufa = *bufa_ptr; 5323 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5324 b_otha = b_oth->a; 5325 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5326 5327 /* a-array */ 5328 /*---------*/ 5329 /* post receives of a-array */ 5330 for (i=0; i<nrecvs; i++) { 5331 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5332 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5333 } 5334 5335 /* pack the outgoing message a-array */ 5336 k = 0; 5337 for (i=0; i<nsends; i++) { 5338 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5339 bufA = bufa+sstartsj[i]; 5340 for (j=0; j<nrows; j++) { 5341 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5342 for (ll=0; ll<sbs; ll++) { 5343 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5344 for (l=0; l<ncols; l++) { 5345 *bufA++ = vals[l]; 5346 } 5347 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5348 } 5349 } 5350 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5351 } 5352 /* recvs and sends of a-array are completed */ 5353 i = nrecvs; 5354 while (i--) { 5355 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5356 } 5357 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5358 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5359 5360 if (scall == MAT_INITIAL_MATRIX) { 5361 /* put together the new matrix */ 5362 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5363 5364 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5365 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5366 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5367 b_oth->free_a = PETSC_TRUE; 5368 b_oth->free_ij = PETSC_TRUE; 5369 b_oth->nonew = 0; 5370 5371 ierr = PetscFree(bufj);CHKERRQ(ierr); 5372 if (!startsj_s || !bufa_ptr) { 5373 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5374 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5375 } else { 5376 *startsj_s = sstartsj; 5377 *startsj_r = rstartsj; 5378 *bufa_ptr = bufa; 5379 } 5380 } 5381 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5382 PetscFunctionReturn(0); 5383 } 5384 5385 /*@C 5386 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5387 5388 Not Collective 5389 5390 Input Parameters: 5391 . A - The matrix in mpiaij format 5392 5393 Output Parameter: 5394 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5395 . colmap - A map from global column index to local index into lvec 5396 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5397 5398 Level: developer 5399 5400 @*/ 5401 #if defined(PETSC_USE_CTABLE) 5402 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5403 #else 5404 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5405 #endif 5406 { 5407 Mat_MPIAIJ *a; 5408 5409 PetscFunctionBegin; 5410 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5411 PetscValidPointer(lvec, 2); 5412 PetscValidPointer(colmap, 3); 5413 PetscValidPointer(multScatter, 4); 5414 a = (Mat_MPIAIJ*) A->data; 5415 if (lvec) *lvec = a->lvec; 5416 if (colmap) *colmap = a->colmap; 5417 if (multScatter) *multScatter = a->Mvctx; 5418 PetscFunctionReturn(0); 5419 } 5420 5421 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5422 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5423 #if defined(PETSC_HAVE_MKL_SPARSE) 5424 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5425 #endif 5426 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5427 #if defined(PETSC_HAVE_ELEMENTAL) 5428 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5429 #endif 5430 #if defined(PETSC_HAVE_HYPRE) 5431 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5432 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5433 #endif 5434 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 5435 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5436 5437 /* 5438 Computes (B'*A')' since computing B*A directly is untenable 5439 5440 n p p 5441 ( ) ( ) ( ) 5442 m ( A ) * n ( B ) = m ( C ) 5443 ( ) ( ) ( ) 5444 5445 */ 5446 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5447 { 5448 PetscErrorCode ierr; 5449 Mat At,Bt,Ct; 5450 5451 PetscFunctionBegin; 5452 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5453 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5454 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5455 ierr = MatDestroy(&At);CHKERRQ(ierr); 5456 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5457 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5458 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5459 PetscFunctionReturn(0); 5460 } 5461 5462 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5463 { 5464 PetscErrorCode ierr; 5465 PetscInt m=A->rmap->n,n=B->cmap->n; 5466 Mat Cmat; 5467 5468 PetscFunctionBegin; 5469 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5470 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5471 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5472 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5473 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5474 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5475 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5476 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5477 5478 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5479 5480 *C = Cmat; 5481 PetscFunctionReturn(0); 5482 } 5483 5484 /* ----------------------------------------------------------------*/ 5485 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5486 { 5487 PetscErrorCode ierr; 5488 5489 PetscFunctionBegin; 5490 if (scall == MAT_INITIAL_MATRIX) { 5491 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5492 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5493 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5494 } 5495 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5496 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5497 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5498 PetscFunctionReturn(0); 5499 } 5500 5501 /*MC 5502 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5503 5504 Options Database Keys: 5505 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5506 5507 Level: beginner 5508 5509 .seealso: MatCreateAIJ() 5510 M*/ 5511 5512 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5513 { 5514 Mat_MPIAIJ *b; 5515 PetscErrorCode ierr; 5516 PetscMPIInt size; 5517 5518 PetscFunctionBegin; 5519 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5520 5521 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5522 B->data = (void*)b; 5523 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5524 B->assembled = PETSC_FALSE; 5525 B->insertmode = NOT_SET_VALUES; 5526 b->size = size; 5527 5528 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5529 5530 /* build cache for off array entries formed */ 5531 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5532 5533 b->donotstash = PETSC_FALSE; 5534 b->colmap = 0; 5535 b->garray = 0; 5536 b->roworiented = PETSC_TRUE; 5537 5538 /* stuff used for matrix vector multiply */ 5539 b->lvec = NULL; 5540 b->Mvctx = NULL; 5541 5542 /* stuff for MatGetRow() */ 5543 b->rowindices = 0; 5544 b->rowvalues = 0; 5545 b->getrowactive = PETSC_FALSE; 5546 5547 /* flexible pointer used in CUSP/CUSPARSE classes */ 5548 b->spptr = NULL; 5549 5550 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5551 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5552 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5553 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5554 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5555 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5556 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5557 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5558 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5559 #if defined(PETSC_HAVE_MKL_SPARSE) 5560 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5561 #endif 5562 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5563 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5564 #if defined(PETSC_HAVE_ELEMENTAL) 5565 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5566 #endif 5567 #if defined(PETSC_HAVE_HYPRE) 5568 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5569 #endif 5570 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5571 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5572 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5573 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5574 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5575 #if defined(PETSC_HAVE_HYPRE) 5576 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5577 #endif 5578 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5579 PetscFunctionReturn(0); 5580 } 5581 5582 /*@C 5583 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5584 and "off-diagonal" part of the matrix in CSR format. 5585 5586 Collective on MPI_Comm 5587 5588 Input Parameters: 5589 + comm - MPI communicator 5590 . m - number of local rows (Cannot be PETSC_DECIDE) 5591 . n - This value should be the same as the local size used in creating the 5592 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5593 calculated if N is given) For square matrices n is almost always m. 5594 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5595 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5596 . i - row indices for "diagonal" portion of matrix 5597 . j - column indices 5598 . a - matrix values 5599 . oi - row indices for "off-diagonal" portion of matrix 5600 . oj - column indices 5601 - oa - matrix values 5602 5603 Output Parameter: 5604 . mat - the matrix 5605 5606 Level: advanced 5607 5608 Notes: 5609 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5610 must free the arrays once the matrix has been destroyed and not before. 5611 5612 The i and j indices are 0 based 5613 5614 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5615 5616 This sets local rows and cannot be used to set off-processor values. 5617 5618 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5619 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5620 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5621 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5622 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5623 communication if it is known that only local entries will be set. 5624 5625 .keywords: matrix, aij, compressed row, sparse, parallel 5626 5627 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5628 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5629 @*/ 5630 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5631 { 5632 PetscErrorCode ierr; 5633 Mat_MPIAIJ *maij; 5634 5635 PetscFunctionBegin; 5636 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5637 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5638 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5639 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5640 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5641 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5642 maij = (Mat_MPIAIJ*) (*mat)->data; 5643 5644 (*mat)->preallocated = PETSC_TRUE; 5645 5646 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5647 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5648 5649 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5650 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5651 5652 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5653 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5654 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5655 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5656 5657 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5658 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5659 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5660 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5661 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5662 PetscFunctionReturn(0); 5663 } 5664 5665 /* 5666 Special version for direct calls from Fortran 5667 */ 5668 #include <petsc/private/fortranimpl.h> 5669 5670 /* Change these macros so can be used in void function */ 5671 #undef CHKERRQ 5672 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5673 #undef SETERRQ2 5674 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5675 #undef SETERRQ3 5676 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5677 #undef SETERRQ 5678 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5679 5680 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5681 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5682 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5683 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5684 #else 5685 #endif 5686 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5687 { 5688 Mat mat = *mmat; 5689 PetscInt m = *mm, n = *mn; 5690 InsertMode addv = *maddv; 5691 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5692 PetscScalar value; 5693 PetscErrorCode ierr; 5694 5695 MatCheckPreallocated(mat,1); 5696 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5697 5698 #if defined(PETSC_USE_DEBUG) 5699 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5700 #endif 5701 { 5702 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5703 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5704 PetscBool roworiented = aij->roworiented; 5705 5706 /* Some Variables required in the macro */ 5707 Mat A = aij->A; 5708 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5709 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5710 MatScalar *aa = a->a; 5711 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5712 Mat B = aij->B; 5713 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5714 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5715 MatScalar *ba = b->a; 5716 5717 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5718 PetscInt nonew = a->nonew; 5719 MatScalar *ap1,*ap2; 5720 5721 PetscFunctionBegin; 5722 for (i=0; i<m; i++) { 5723 if (im[i] < 0) continue; 5724 #if defined(PETSC_USE_DEBUG) 5725 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5726 #endif 5727 if (im[i] >= rstart && im[i] < rend) { 5728 row = im[i] - rstart; 5729 lastcol1 = -1; 5730 rp1 = aj + ai[row]; 5731 ap1 = aa + ai[row]; 5732 rmax1 = aimax[row]; 5733 nrow1 = ailen[row]; 5734 low1 = 0; 5735 high1 = nrow1; 5736 lastcol2 = -1; 5737 rp2 = bj + bi[row]; 5738 ap2 = ba + bi[row]; 5739 rmax2 = bimax[row]; 5740 nrow2 = bilen[row]; 5741 low2 = 0; 5742 high2 = nrow2; 5743 5744 for (j=0; j<n; j++) { 5745 if (roworiented) value = v[i*n+j]; 5746 else value = v[i+j*m]; 5747 if (in[j] >= cstart && in[j] < cend) { 5748 col = in[j] - cstart; 5749 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5750 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5751 } else if (in[j] < 0) continue; 5752 #if defined(PETSC_USE_DEBUG) 5753 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5754 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5755 #endif 5756 else { 5757 if (mat->was_assembled) { 5758 if (!aij->colmap) { 5759 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5760 } 5761 #if defined(PETSC_USE_CTABLE) 5762 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5763 col--; 5764 #else 5765 col = aij->colmap[in[j]] - 1; 5766 #endif 5767 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5768 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5769 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5770 col = in[j]; 5771 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5772 B = aij->B; 5773 b = (Mat_SeqAIJ*)B->data; 5774 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5775 rp2 = bj + bi[row]; 5776 ap2 = ba + bi[row]; 5777 rmax2 = bimax[row]; 5778 nrow2 = bilen[row]; 5779 low2 = 0; 5780 high2 = nrow2; 5781 bm = aij->B->rmap->n; 5782 ba = b->a; 5783 } 5784 } else col = in[j]; 5785 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5786 } 5787 } 5788 } else if (!aij->donotstash) { 5789 if (roworiented) { 5790 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5791 } else { 5792 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5793 } 5794 } 5795 } 5796 } 5797 PetscFunctionReturnVoid(); 5798 } 5799 5800