1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 126 PetscFunctionBegin; 127 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 128 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 129 } else { 130 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 131 } 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 138 PetscErrorCode ierr; 139 PetscInt i,rstart,nrows,*rows; 140 141 PetscFunctionBegin; 142 *zrows = NULL; 143 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 144 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 145 for (i=0; i<nrows; i++) rows[i] += rstart; 146 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 147 PetscFunctionReturn(0); 148 } 149 150 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 151 { 152 PetscErrorCode ierr; 153 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 154 PetscInt i,n,*garray = aij->garray; 155 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 156 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 157 PetscReal *work; 158 159 PetscFunctionBegin; 160 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 161 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 162 if (type == NORM_2) { 163 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 164 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 165 } 166 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 167 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 168 } 169 } else if (type == NORM_1) { 170 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 171 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 172 } 173 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 174 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 175 } 176 } else if (type == NORM_INFINITY) { 177 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 178 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 179 } 180 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 181 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 182 } 183 184 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 185 if (type == NORM_INFINITY) { 186 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 187 } else { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } 190 ierr = PetscFree(work);CHKERRQ(ierr); 191 if (type == NORM_2) { 192 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 193 } 194 PetscFunctionReturn(0); 195 } 196 197 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 198 { 199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 200 IS sis,gis; 201 PetscErrorCode ierr; 202 const PetscInt *isis,*igis; 203 PetscInt n,*iis,nsis,ngis,rstart,i; 204 205 PetscFunctionBegin; 206 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 207 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 208 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 209 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 210 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 211 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 212 213 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 215 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 216 n = ngis + nsis; 217 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 218 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 219 for (i=0; i<n; i++) iis[i] += rstart; 220 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 221 222 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 223 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 224 ierr = ISDestroy(&sis);CHKERRQ(ierr); 225 ierr = ISDestroy(&gis);CHKERRQ(ierr); 226 PetscFunctionReturn(0); 227 } 228 229 /* 230 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 231 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 232 233 Only for square matrices 234 235 Used by a preconditioner, hence PETSC_EXTERN 236 */ 237 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 238 { 239 PetscMPIInt rank,size; 240 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 241 PetscErrorCode ierr; 242 Mat mat; 243 Mat_SeqAIJ *gmata; 244 PetscMPIInt tag; 245 MPI_Status status; 246 PetscBool aij; 247 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 248 249 PetscFunctionBegin; 250 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 251 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 252 if (!rank) { 253 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 254 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 255 } 256 if (reuse == MAT_INITIAL_MATRIX) { 257 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 258 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 259 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 260 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 261 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 262 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 263 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 264 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 265 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 266 267 rowners[0] = 0; 268 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 269 rstart = rowners[rank]; 270 rend = rowners[rank+1]; 271 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 272 if (!rank) { 273 gmata = (Mat_SeqAIJ*) gmat->data; 274 /* send row lengths to all processors */ 275 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 276 for (i=1; i<size; i++) { 277 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 278 } 279 /* determine number diagonal and off-diagonal counts */ 280 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 281 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 282 jj = 0; 283 for (i=0; i<m; i++) { 284 for (j=0; j<dlens[i]; j++) { 285 if (gmata->j[jj] < rstart) ld[i]++; 286 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 287 jj++; 288 } 289 } 290 /* send column indices to other processes */ 291 for (i=1; i<size; i++) { 292 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 293 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 295 } 296 297 /* send numerical values to other processes */ 298 for (i=1; i<size; i++) { 299 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 300 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 301 } 302 gmataa = gmata->a; 303 gmataj = gmata->j; 304 305 } else { 306 /* receive row lengths */ 307 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 /* receive column indices */ 309 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 311 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 /* determine number diagonal and off-diagonal counts */ 313 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 314 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 315 jj = 0; 316 for (i=0; i<m; i++) { 317 for (j=0; j<dlens[i]; j++) { 318 if (gmataj[jj] < rstart) ld[i]++; 319 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 320 jj++; 321 } 322 } 323 /* receive numerical values */ 324 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 325 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 326 } 327 /* set preallocation */ 328 for (i=0; i<m; i++) { 329 dlens[i] -= olens[i]; 330 } 331 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 332 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 333 334 for (i=0; i<m; i++) { 335 dlens[i] += olens[i]; 336 } 337 cnt = 0; 338 for (i=0; i<m; i++) { 339 row = rstart + i; 340 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 341 cnt += dlens[i]; 342 } 343 if (rank) { 344 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 345 } 346 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 347 ierr = PetscFree(rowners);CHKERRQ(ierr); 348 349 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 350 351 *inmat = mat; 352 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 353 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 354 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 355 mat = *inmat; 356 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 357 if (!rank) { 358 /* send numerical values to other processes */ 359 gmata = (Mat_SeqAIJ*) gmat->data; 360 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 361 gmataa = gmata->a; 362 for (i=1; i<size; i++) { 363 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 364 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 365 } 366 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 367 } else { 368 /* receive numerical values from process 0*/ 369 nz = Ad->nz + Ao->nz; 370 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 371 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 372 } 373 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 374 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 375 ad = Ad->a; 376 ao = Ao->a; 377 if (mat->rmap->n) { 378 i = 0; 379 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 380 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 381 } 382 for (i=1; i<mat->rmap->n; i++) { 383 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 384 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 385 } 386 i--; 387 if (mat->rmap->n) { 388 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 389 } 390 if (rank) { 391 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 392 } 393 } 394 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 396 PetscFunctionReturn(0); 397 } 398 399 /* 400 Local utility routine that creates a mapping from the global column 401 number to the local number in the off-diagonal part of the local 402 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 403 a slightly higher hash table cost; without it it is not scalable (each processor 404 has an order N integer array but is fast to acess. 405 */ 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 463 { \ 464 if (col <= lastcol2) low2 = 0; \ 465 else high2 = nrow2; \ 466 lastcol2 = col; \ 467 while (high2-low2 > 5) { \ 468 t = (low2+high2)/2; \ 469 if (rp2[t] > col) high2 = t; \ 470 else low2 = t; \ 471 } \ 472 for (_i=low2; _i<high2; _i++) { \ 473 if (rp2[_i] > col) break; \ 474 if (rp2[_i] == col) { \ 475 if (addv == ADD_VALUES) ap2[_i] += value; \ 476 else ap2[_i] = value; \ 477 goto b_noinsert; \ 478 } \ 479 } \ 480 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 483 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 484 N = nrow2++ - 1; b->nz++; high2++; \ 485 /* shift up all the later entries in this row */ \ 486 for (ii=N; ii>=_i; ii--) { \ 487 rp2[ii+1] = rp2[ii]; \ 488 ap2[ii+1] = ap2[ii]; \ 489 } \ 490 rp2[_i] = col; \ 491 ap2[_i] = value; \ 492 B->nonzerostate++; \ 493 b_noinsert: ; \ 494 bilen[row] = nrow2; \ 495 } 496 497 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 498 { 499 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 500 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 501 PetscErrorCode ierr; 502 PetscInt l,*garray = mat->garray,diag; 503 504 PetscFunctionBegin; 505 /* code only works for square matrices A */ 506 507 /* find size of row to the left of the diagonal part */ 508 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 509 row = row - diag; 510 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 511 if (garray[b->j[b->i[row]+l]] > diag) break; 512 } 513 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 514 515 /* diagonal part */ 516 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* right of diagonal part */ 519 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 520 PetscFunctionReturn(0); 521 } 522 523 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 524 { 525 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 526 PetscScalar value; 527 PetscErrorCode ierr; 528 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 529 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 530 PetscBool roworiented = aij->roworiented; 531 532 /* Some Variables required in the macro */ 533 Mat A = aij->A; 534 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 535 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 536 MatScalar *aa = a->a; 537 PetscBool ignorezeroentries = a->ignorezeroentries; 538 Mat B = aij->B; 539 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 540 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 541 MatScalar *ba = b->a; 542 543 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 544 PetscInt nonew; 545 MatScalar *ap1,*ap2; 546 547 PetscFunctionBegin; 548 for (i=0; i<m; i++) { 549 if (im[i] < 0) continue; 550 #if defined(PETSC_USE_DEBUG) 551 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 552 #endif 553 if (im[i] >= rstart && im[i] < rend) { 554 row = im[i] - rstart; 555 lastcol1 = -1; 556 rp1 = aj + ai[row]; 557 ap1 = aa + ai[row]; 558 rmax1 = aimax[row]; 559 nrow1 = ailen[row]; 560 low1 = 0; 561 high1 = nrow1; 562 lastcol2 = -1; 563 rp2 = bj + bi[row]; 564 ap2 = ba + bi[row]; 565 rmax2 = bimax[row]; 566 nrow2 = bilen[row]; 567 low2 = 0; 568 high2 = nrow2; 569 570 for (j=0; j<n; j++) { 571 if (roworiented) value = v[i*n+j]; 572 else value = v[i+j*m]; 573 if (in[j] >= cstart && in[j] < cend) { 574 col = in[j] - cstart; 575 nonew = a->nonew; 576 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 577 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 578 } else if (in[j] < 0) continue; 579 #if defined(PETSC_USE_DEBUG) 580 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 581 #endif 582 else { 583 if (mat->was_assembled) { 584 if (!aij->colmap) { 585 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 586 } 587 #if defined(PETSC_USE_CTABLE) 588 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 589 col--; 590 #else 591 col = aij->colmap[in[j]] - 1; 592 #endif 593 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 594 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 595 col = in[j]; 596 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 597 B = aij->B; 598 b = (Mat_SeqAIJ*)B->data; 599 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 600 rp2 = bj + bi[row]; 601 ap2 = ba + bi[row]; 602 rmax2 = bimax[row]; 603 nrow2 = bilen[row]; 604 low2 = 0; 605 high2 = nrow2; 606 bm = aij->B->rmap->n; 607 ba = b->a; 608 } else if (col < 0) { 609 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 610 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 611 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 612 } 613 } else col = in[j]; 614 nonew = b->nonew; 615 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 616 } 617 } 618 } else { 619 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 620 if (!aij->donotstash) { 621 mat->assembled = PETSC_FALSE; 622 if (roworiented) { 623 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 624 } else { 625 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } 627 } 628 } 629 } 630 PetscFunctionReturn(0); 631 } 632 633 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 PetscErrorCode ierr; 637 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 638 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 639 640 PetscFunctionBegin; 641 for (i=0; i<m; i++) { 642 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 643 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 644 if (idxm[i] >= rstart && idxm[i] < rend) { 645 row = idxm[i] - rstart; 646 for (j=0; j<n; j++) { 647 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 648 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 649 if (idxn[j] >= cstart && idxn[j] < cend) { 650 col = idxn[j] - cstart; 651 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 652 } else { 653 if (!aij->colmap) { 654 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 655 } 656 #if defined(PETSC_USE_CTABLE) 657 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 658 col--; 659 #else 660 col = aij->colmap[idxn[j]] - 1; 661 #endif 662 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 663 else { 664 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 665 } 666 } 667 } 668 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 669 } 670 PetscFunctionReturn(0); 671 } 672 673 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 674 675 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 676 { 677 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 678 PetscErrorCode ierr; 679 PetscInt nstash,reallocs; 680 681 PetscFunctionBegin; 682 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 683 684 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 685 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 686 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 691 { 692 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 693 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 694 PetscErrorCode ierr; 695 PetscMPIInt n; 696 PetscInt i,j,rstart,ncols,flg; 697 PetscInt *row,*col; 698 PetscBool other_disassembled; 699 PetscScalar *val; 700 701 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 702 703 PetscFunctionBegin; 704 if (!aij->donotstash && !mat->nooffprocentries) { 705 while (1) { 706 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 707 if (!flg) break; 708 709 for (i=0; i<n; ) { 710 /* Now identify the consecutive vals belonging to the same row */ 711 for (j=i,rstart=row[j]; j<n; j++) { 712 if (row[j] != rstart) break; 713 } 714 if (j < n) ncols = j-i; 715 else ncols = n-i; 716 /* Now assemble all these values with a single function call */ 717 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 718 719 i = j; 720 } 721 } 722 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 723 } 724 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 725 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 726 727 /* determine if any processor has disassembled, if so we must 728 also disassemble ourselfs, in order that we may reassemble. */ 729 /* 730 if nonzero structure of submatrix B cannot change then we know that 731 no processor disassembled thus we can skip this stuff 732 */ 733 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 734 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 735 if (mat->was_assembled && !other_disassembled) { 736 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 737 } 738 } 739 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 740 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 741 } 742 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 743 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 744 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 745 746 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 747 748 aij->rowvalues = 0; 749 750 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 751 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 752 753 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 754 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 755 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 756 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 757 } 758 PetscFunctionReturn(0); 759 } 760 761 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 762 { 763 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 764 PetscErrorCode ierr; 765 766 PetscFunctionBegin; 767 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 768 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 773 { 774 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 775 PetscInt *lrows; 776 PetscInt r, len; 777 PetscErrorCode ierr; 778 779 PetscFunctionBegin; 780 /* get locally owned rows */ 781 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 782 /* fix right hand side if needed */ 783 if (x && b) { 784 const PetscScalar *xx; 785 PetscScalar *bb; 786 787 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 788 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 789 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 790 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 791 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 792 } 793 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 794 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 795 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 796 PetscBool cong; 797 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 798 if (cong) A->congruentlayouts = 1; 799 else A->congruentlayouts = 0; 800 } 801 if ((diag != 0.0) && A->congruentlayouts) { 802 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 803 } else if (diag != 0.0) { 804 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 805 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 806 for (r = 0; r < len; ++r) { 807 const PetscInt row = lrows[r] + A->rmap->rstart; 808 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 809 } 810 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 811 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 812 } else { 813 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 814 } 815 ierr = PetscFree(lrows);CHKERRQ(ierr); 816 817 /* only change matrix nonzero state if pattern was allowed to be changed */ 818 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 819 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 820 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 821 } 822 PetscFunctionReturn(0); 823 } 824 825 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 826 { 827 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 828 PetscErrorCode ierr; 829 PetscMPIInt n = A->rmap->n; 830 PetscInt i,j,r,m,p = 0,len = 0; 831 PetscInt *lrows,*owners = A->rmap->range; 832 PetscSFNode *rrows; 833 PetscSF sf; 834 const PetscScalar *xx; 835 PetscScalar *bb,*mask; 836 Vec xmask,lmask; 837 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 838 const PetscInt *aj, *ii,*ridx; 839 PetscScalar *aa; 840 841 PetscFunctionBegin; 842 /* Create SF where leaves are input rows and roots are owned rows */ 843 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 844 for (r = 0; r < n; ++r) lrows[r] = -1; 845 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 846 for (r = 0; r < N; ++r) { 847 const PetscInt idx = rows[r]; 848 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 849 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 850 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 851 } 852 rrows[r].rank = p; 853 rrows[r].index = rows[r] - owners[p]; 854 } 855 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 856 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 857 /* Collect flags for rows to be zeroed */ 858 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 859 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 860 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 861 /* Compress and put in row numbers */ 862 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 863 /* zero diagonal part of matrix */ 864 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 865 /* handle off diagonal part of matrix */ 866 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 867 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 868 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 869 for (i=0; i<len; i++) bb[lrows[i]] = 1; 870 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 871 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 874 if (x) { 875 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 876 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 878 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 879 } 880 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 881 /* remove zeroed rows of off diagonal matrix */ 882 ii = aij->i; 883 for (i=0; i<len; i++) { 884 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 885 } 886 /* loop over all elements of off process part of matrix zeroing removed columns*/ 887 if (aij->compressedrow.use) { 888 m = aij->compressedrow.nrows; 889 ii = aij->compressedrow.i; 890 ridx = aij->compressedrow.rindex; 891 for (i=0; i<m; i++) { 892 n = ii[i+1] - ii[i]; 893 aj = aij->j + ii[i]; 894 aa = aij->a + ii[i]; 895 896 for (j=0; j<n; j++) { 897 if (PetscAbsScalar(mask[*aj])) { 898 if (b) bb[*ridx] -= *aa*xx[*aj]; 899 *aa = 0.0; 900 } 901 aa++; 902 aj++; 903 } 904 ridx++; 905 } 906 } else { /* do not use compressed row format */ 907 m = l->B->rmap->n; 908 for (i=0; i<m; i++) { 909 n = ii[i+1] - ii[i]; 910 aj = aij->j + ii[i]; 911 aa = aij->a + ii[i]; 912 for (j=0; j<n; j++) { 913 if (PetscAbsScalar(mask[*aj])) { 914 if (b) bb[i] -= *aa*xx[*aj]; 915 *aa = 0.0; 916 } 917 aa++; 918 aj++; 919 } 920 } 921 } 922 if (x) { 923 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 924 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 925 } 926 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 927 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 928 ierr = PetscFree(lrows);CHKERRQ(ierr); 929 930 /* only change matrix nonzero state if pattern was allowed to be changed */ 931 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 932 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 933 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 934 } 935 PetscFunctionReturn(0); 936 } 937 938 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 939 { 940 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 941 PetscErrorCode ierr; 942 PetscInt nt; 943 VecScatter Mvctx = a->Mvctx; 944 945 PetscFunctionBegin; 946 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 947 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 948 949 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 950 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 951 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 952 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 957 { 958 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 959 PetscErrorCode ierr; 960 961 PetscFunctionBegin; 962 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 PetscErrorCode ierr; 970 VecScatter Mvctx = a->Mvctx; 971 972 PetscFunctionBegin; 973 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 974 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 976 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 977 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 982 { 983 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 984 PetscErrorCode ierr; 985 PetscBool merged; 986 987 PetscFunctionBegin; 988 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 989 /* do nondiagonal part */ 990 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 991 if (!merged) { 992 /* send it on its way */ 993 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 994 /* do local part */ 995 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 996 /* receive remote parts: note this assumes the values are not actually */ 997 /* added in yy until the next line, */ 998 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 } else { 1000 /* do local part */ 1001 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1002 /* send it on its way */ 1003 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1004 /* values actually were received in the Begin() but we need to call this nop */ 1005 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1006 } 1007 PetscFunctionReturn(0); 1008 } 1009 1010 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1011 { 1012 MPI_Comm comm; 1013 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1014 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1015 IS Me,Notme; 1016 PetscErrorCode ierr; 1017 PetscInt M,N,first,last,*notme,i; 1018 PetscMPIInt size; 1019 1020 PetscFunctionBegin; 1021 /* Easy test: symmetric diagonal block */ 1022 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1023 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1024 if (!*f) PetscFunctionReturn(0); 1025 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1026 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1027 if (size == 1) PetscFunctionReturn(0); 1028 1029 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1030 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1031 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1032 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1033 for (i=0; i<first; i++) notme[i] = i; 1034 for (i=last; i<M; i++) notme[i-last+first] = i; 1035 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1036 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1037 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1038 Aoff = Aoffs[0]; 1039 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1040 Boff = Boffs[0]; 1041 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1042 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1043 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1044 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1045 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1046 ierr = PetscFree(notme);CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1051 { 1052 PetscErrorCode ierr; 1053 1054 PetscFunctionBegin; 1055 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1060 { 1061 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1062 PetscErrorCode ierr; 1063 1064 PetscFunctionBegin; 1065 /* do nondiagonal part */ 1066 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1067 /* send it on its way */ 1068 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1069 /* do local part */ 1070 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1071 /* receive remote parts */ 1072 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 /* 1077 This only works correctly for square matrices where the subblock A->A is the 1078 diagonal block 1079 */ 1080 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1081 { 1082 PetscErrorCode ierr; 1083 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1084 1085 PetscFunctionBegin; 1086 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1087 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1088 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1093 { 1094 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1099 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1104 { 1105 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 #if defined(PETSC_USE_LOG) 1110 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1111 #endif 1112 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1113 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1114 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1115 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1116 #if defined(PETSC_USE_CTABLE) 1117 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1118 #else 1119 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1120 #endif 1121 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1122 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1123 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1124 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1125 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1126 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1127 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1128 1129 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1134 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1135 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1136 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1138 #if defined(PETSC_HAVE_ELEMENTAL) 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1140 #endif 1141 #if defined(PETSC_HAVE_HYPRE) 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1144 #endif 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1146 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1151 { 1152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1153 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1154 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1155 PetscErrorCode ierr; 1156 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1157 int fd; 1158 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1159 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1160 PetscScalar *column_values; 1161 PetscInt message_count,flowcontrolcount; 1162 FILE *file; 1163 1164 PetscFunctionBegin; 1165 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1166 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1167 nz = A->nz + B->nz; 1168 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1169 if (!rank) { 1170 header[0] = MAT_FILE_CLASSID; 1171 header[1] = mat->rmap->N; 1172 header[2] = mat->cmap->N; 1173 1174 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1175 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1176 /* get largest number of rows any processor has */ 1177 rlen = mat->rmap->n; 1178 range = mat->rmap->range; 1179 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1180 } else { 1181 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1182 rlen = mat->rmap->n; 1183 } 1184 1185 /* load up the local row counts */ 1186 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1187 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1188 1189 /* store the row lengths to the file */ 1190 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1191 if (!rank) { 1192 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1193 for (i=1; i<size; i++) { 1194 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1195 rlen = range[i+1] - range[i]; 1196 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1197 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1198 } 1199 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1200 } else { 1201 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1202 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1203 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1204 } 1205 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1206 1207 /* load up the local column indices */ 1208 nzmax = nz; /* th processor needs space a largest processor needs */ 1209 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1210 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1211 cnt = 0; 1212 for (i=0; i<mat->rmap->n; i++) { 1213 for (j=B->i[i]; j<B->i[i+1]; j++) { 1214 if ((col = garray[B->j[j]]) > cstart) break; 1215 column_indices[cnt++] = col; 1216 } 1217 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1218 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1219 } 1220 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1221 1222 /* store the column indices to the file */ 1223 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1224 if (!rank) { 1225 MPI_Status status; 1226 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1227 for (i=1; i<size; i++) { 1228 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1229 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1230 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1231 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1233 } 1234 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1235 } else { 1236 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1237 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1238 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1240 } 1241 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1242 1243 /* load up the local column values */ 1244 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1245 cnt = 0; 1246 for (i=0; i<mat->rmap->n; i++) { 1247 for (j=B->i[i]; j<B->i[i+1]; j++) { 1248 if (garray[B->j[j]] > cstart) break; 1249 column_values[cnt++] = B->a[j]; 1250 } 1251 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1252 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1253 } 1254 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1255 1256 /* store the column values to the file */ 1257 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1258 if (!rank) { 1259 MPI_Status status; 1260 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1261 for (i=1; i<size; i++) { 1262 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1263 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1264 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1265 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1266 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1267 } 1268 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1269 } else { 1270 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1271 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1272 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1273 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1274 } 1275 ierr = PetscFree(column_values);CHKERRQ(ierr); 1276 1277 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1278 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #include <petscdraw.h> 1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1284 { 1285 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1286 PetscErrorCode ierr; 1287 PetscMPIInt rank = aij->rank,size = aij->size; 1288 PetscBool isdraw,iascii,isbinary; 1289 PetscViewer sviewer; 1290 PetscViewerFormat format; 1291 1292 PetscFunctionBegin; 1293 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1294 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1295 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1296 if (iascii) { 1297 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1298 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1299 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1300 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1301 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1302 for (i=0; i<(PetscInt)size; i++) { 1303 nmax = PetscMax(nmax,nz[i]); 1304 nmin = PetscMin(nmin,nz[i]); 1305 navg += nz[i]; 1306 } 1307 ierr = PetscFree(nz);CHKERRQ(ierr); 1308 navg = navg/size; 1309 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1313 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1314 MatInfo info; 1315 PetscBool inodes; 1316 1317 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1318 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1319 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1320 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1321 if (!inodes) { 1322 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1323 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1324 } else { 1325 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1326 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1327 } 1328 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1329 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1330 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1331 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1332 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1333 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1334 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1335 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1338 PetscInt inodecount,inodelimit,*inodes; 1339 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1340 if (inodes) { 1341 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1342 } else { 1343 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1344 } 1345 PetscFunctionReturn(0); 1346 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1347 PetscFunctionReturn(0); 1348 } 1349 } else if (isbinary) { 1350 if (size == 1) { 1351 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1352 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1353 } else { 1354 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1355 } 1356 PetscFunctionReturn(0); 1357 } else if (isdraw) { 1358 PetscDraw draw; 1359 PetscBool isnull; 1360 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1361 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1362 if (isnull) PetscFunctionReturn(0); 1363 } 1364 1365 { 1366 /* assemble the entire matrix onto first processor. */ 1367 Mat A; 1368 Mat_SeqAIJ *Aloc; 1369 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1370 MatScalar *a; 1371 1372 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1373 if (!rank) { 1374 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1375 } else { 1376 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1377 } 1378 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1379 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1380 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1381 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1382 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1383 1384 /* copy over the A part */ 1385 Aloc = (Mat_SeqAIJ*)aij->A->data; 1386 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1387 row = mat->rmap->rstart; 1388 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1389 for (i=0; i<m; i++) { 1390 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1391 row++; 1392 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1393 } 1394 aj = Aloc->j; 1395 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1396 1397 /* copy over the B part */ 1398 Aloc = (Mat_SeqAIJ*)aij->B->data; 1399 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1400 row = mat->rmap->rstart; 1401 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1402 ct = cols; 1403 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1404 for (i=0; i<m; i++) { 1405 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1406 row++; 1407 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1408 } 1409 ierr = PetscFree(ct);CHKERRQ(ierr); 1410 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1412 /* 1413 Everyone has to call to draw the matrix since the graphics waits are 1414 synchronized across all processors that share the PetscDraw object 1415 */ 1416 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1417 if (!rank) { 1418 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1419 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1420 } 1421 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1422 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1423 ierr = MatDestroy(&A);CHKERRQ(ierr); 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1429 { 1430 PetscErrorCode ierr; 1431 PetscBool iascii,isdraw,issocket,isbinary; 1432 1433 PetscFunctionBegin; 1434 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1435 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1436 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1437 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1438 if (iascii || isdraw || isbinary || issocket) { 1439 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1440 } 1441 PetscFunctionReturn(0); 1442 } 1443 1444 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1445 { 1446 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1447 PetscErrorCode ierr; 1448 Vec bb1 = 0; 1449 PetscBool hasop; 1450 1451 PetscFunctionBegin; 1452 if (flag == SOR_APPLY_UPPER) { 1453 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1454 PetscFunctionReturn(0); 1455 } 1456 1457 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1458 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1459 } 1460 1461 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1462 if (flag & SOR_ZERO_INITIAL_GUESS) { 1463 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1464 its--; 1465 } 1466 1467 while (its--) { 1468 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1469 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1470 1471 /* update rhs: bb1 = bb - B*x */ 1472 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1473 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1474 1475 /* local sweep */ 1476 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1477 } 1478 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1479 if (flag & SOR_ZERO_INITIAL_GUESS) { 1480 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1481 its--; 1482 } 1483 while (its--) { 1484 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1485 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1486 1487 /* update rhs: bb1 = bb - B*x */ 1488 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1489 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1490 1491 /* local sweep */ 1492 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1493 } 1494 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1495 if (flag & SOR_ZERO_INITIAL_GUESS) { 1496 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1497 its--; 1498 } 1499 while (its--) { 1500 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1501 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1502 1503 /* update rhs: bb1 = bb - B*x */ 1504 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1505 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1506 1507 /* local sweep */ 1508 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1509 } 1510 } else if (flag & SOR_EISENSTAT) { 1511 Vec xx1; 1512 1513 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1514 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1515 1516 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1517 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1518 if (!mat->diag) { 1519 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1520 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1521 } 1522 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1523 if (hasop) { 1524 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1525 } else { 1526 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1527 } 1528 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1529 1530 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1531 1532 /* local sweep */ 1533 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1534 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1535 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1536 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1537 1538 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1539 1540 matin->factorerrortype = mat->A->factorerrortype; 1541 PetscFunctionReturn(0); 1542 } 1543 1544 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1545 { 1546 Mat aA,aB,Aperm; 1547 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1548 PetscScalar *aa,*ba; 1549 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1550 PetscSF rowsf,sf; 1551 IS parcolp = NULL; 1552 PetscBool done; 1553 PetscErrorCode ierr; 1554 1555 PetscFunctionBegin; 1556 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1557 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1558 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1559 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1560 1561 /* Invert row permutation to find out where my rows should go */ 1562 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1563 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1564 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1565 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1566 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1567 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1568 1569 /* Invert column permutation to find out where my columns should go */ 1570 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1571 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1572 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1573 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1574 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1575 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1576 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1577 1578 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1579 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1580 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1581 1582 /* Find out where my gcols should go */ 1583 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1584 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1585 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1586 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1587 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1588 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1589 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1590 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1591 1592 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1593 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1594 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1595 for (i=0; i<m; i++) { 1596 PetscInt row = rdest[i],rowner; 1597 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1598 for (j=ai[i]; j<ai[i+1]; j++) { 1599 PetscInt cowner,col = cdest[aj[j]]; 1600 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1601 if (rowner == cowner) dnnz[i]++; 1602 else onnz[i]++; 1603 } 1604 for (j=bi[i]; j<bi[i+1]; j++) { 1605 PetscInt cowner,col = gcdest[bj[j]]; 1606 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1607 if (rowner == cowner) dnnz[i]++; 1608 else onnz[i]++; 1609 } 1610 } 1611 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1612 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1613 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1614 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1615 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1616 1617 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1618 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1619 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1620 for (i=0; i<m; i++) { 1621 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1622 PetscInt j0,rowlen; 1623 rowlen = ai[i+1] - ai[i]; 1624 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1625 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1626 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1627 } 1628 rowlen = bi[i+1] - bi[i]; 1629 for (j0=j=0; j<rowlen; j0=j) { 1630 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1631 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1632 } 1633 } 1634 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1635 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1636 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1637 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1638 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1639 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1640 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1641 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1642 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1643 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1644 *B = Aperm; 1645 PetscFunctionReturn(0); 1646 } 1647 1648 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1649 { 1650 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1651 PetscErrorCode ierr; 1652 1653 PetscFunctionBegin; 1654 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1655 if (ghosts) *ghosts = aij->garray; 1656 PetscFunctionReturn(0); 1657 } 1658 1659 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1660 { 1661 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1662 Mat A = mat->A,B = mat->B; 1663 PetscErrorCode ierr; 1664 PetscReal isend[5],irecv[5]; 1665 1666 PetscFunctionBegin; 1667 info->block_size = 1.0; 1668 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1669 1670 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1671 isend[3] = info->memory; isend[4] = info->mallocs; 1672 1673 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1674 1675 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1676 isend[3] += info->memory; isend[4] += info->mallocs; 1677 if (flag == MAT_LOCAL) { 1678 info->nz_used = isend[0]; 1679 info->nz_allocated = isend[1]; 1680 info->nz_unneeded = isend[2]; 1681 info->memory = isend[3]; 1682 info->mallocs = isend[4]; 1683 } else if (flag == MAT_GLOBAL_MAX) { 1684 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1685 1686 info->nz_used = irecv[0]; 1687 info->nz_allocated = irecv[1]; 1688 info->nz_unneeded = irecv[2]; 1689 info->memory = irecv[3]; 1690 info->mallocs = irecv[4]; 1691 } else if (flag == MAT_GLOBAL_SUM) { 1692 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1693 1694 info->nz_used = irecv[0]; 1695 info->nz_allocated = irecv[1]; 1696 info->nz_unneeded = irecv[2]; 1697 info->memory = irecv[3]; 1698 info->mallocs = irecv[4]; 1699 } 1700 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1701 info->fill_ratio_needed = 0; 1702 info->factor_mallocs = 0; 1703 PetscFunctionReturn(0); 1704 } 1705 1706 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1707 { 1708 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1709 PetscErrorCode ierr; 1710 1711 PetscFunctionBegin; 1712 switch (op) { 1713 case MAT_NEW_NONZERO_LOCATIONS: 1714 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1715 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1716 case MAT_KEEP_NONZERO_PATTERN: 1717 case MAT_NEW_NONZERO_LOCATION_ERR: 1718 case MAT_USE_INODES: 1719 case MAT_IGNORE_ZERO_ENTRIES: 1720 MatCheckPreallocated(A,1); 1721 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1722 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1723 break; 1724 case MAT_ROW_ORIENTED: 1725 MatCheckPreallocated(A,1); 1726 a->roworiented = flg; 1727 1728 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1729 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1730 break; 1731 case MAT_NEW_DIAGONALS: 1732 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1733 break; 1734 case MAT_IGNORE_OFF_PROC_ENTRIES: 1735 a->donotstash = flg; 1736 break; 1737 case MAT_SPD: 1738 A->spd_set = PETSC_TRUE; 1739 A->spd = flg; 1740 if (flg) { 1741 A->symmetric = PETSC_TRUE; 1742 A->structurally_symmetric = PETSC_TRUE; 1743 A->symmetric_set = PETSC_TRUE; 1744 A->structurally_symmetric_set = PETSC_TRUE; 1745 } 1746 break; 1747 case MAT_SYMMETRIC: 1748 MatCheckPreallocated(A,1); 1749 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1750 break; 1751 case MAT_STRUCTURALLY_SYMMETRIC: 1752 MatCheckPreallocated(A,1); 1753 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1754 break; 1755 case MAT_HERMITIAN: 1756 MatCheckPreallocated(A,1); 1757 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1758 break; 1759 case MAT_SYMMETRY_ETERNAL: 1760 MatCheckPreallocated(A,1); 1761 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1762 break; 1763 case MAT_SUBMAT_SINGLEIS: 1764 A->submat_singleis = flg; 1765 break; 1766 case MAT_STRUCTURE_ONLY: 1767 /* The option is handled directly by MatSetOption() */ 1768 break; 1769 default: 1770 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1771 } 1772 PetscFunctionReturn(0); 1773 } 1774 1775 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1776 { 1777 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1778 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1779 PetscErrorCode ierr; 1780 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1781 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1782 PetscInt *cmap,*idx_p; 1783 1784 PetscFunctionBegin; 1785 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1786 mat->getrowactive = PETSC_TRUE; 1787 1788 if (!mat->rowvalues && (idx || v)) { 1789 /* 1790 allocate enough space to hold information from the longest row. 1791 */ 1792 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1793 PetscInt max = 1,tmp; 1794 for (i=0; i<matin->rmap->n; i++) { 1795 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1796 if (max < tmp) max = tmp; 1797 } 1798 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1799 } 1800 1801 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1802 lrow = row - rstart; 1803 1804 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1805 if (!v) {pvA = 0; pvB = 0;} 1806 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1807 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1808 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1809 nztot = nzA + nzB; 1810 1811 cmap = mat->garray; 1812 if (v || idx) { 1813 if (nztot) { 1814 /* Sort by increasing column numbers, assuming A and B already sorted */ 1815 PetscInt imark = -1; 1816 if (v) { 1817 *v = v_p = mat->rowvalues; 1818 for (i=0; i<nzB; i++) { 1819 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1820 else break; 1821 } 1822 imark = i; 1823 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1824 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1825 } 1826 if (idx) { 1827 *idx = idx_p = mat->rowindices; 1828 if (imark > -1) { 1829 for (i=0; i<imark; i++) { 1830 idx_p[i] = cmap[cworkB[i]]; 1831 } 1832 } else { 1833 for (i=0; i<nzB; i++) { 1834 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1835 else break; 1836 } 1837 imark = i; 1838 } 1839 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1840 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1841 } 1842 } else { 1843 if (idx) *idx = 0; 1844 if (v) *v = 0; 1845 } 1846 } 1847 *nz = nztot; 1848 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1849 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1850 PetscFunctionReturn(0); 1851 } 1852 1853 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1854 { 1855 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1856 1857 PetscFunctionBegin; 1858 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1859 aij->getrowactive = PETSC_FALSE; 1860 PetscFunctionReturn(0); 1861 } 1862 1863 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1864 { 1865 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1866 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1867 PetscErrorCode ierr; 1868 PetscInt i,j,cstart = mat->cmap->rstart; 1869 PetscReal sum = 0.0; 1870 MatScalar *v; 1871 1872 PetscFunctionBegin; 1873 if (aij->size == 1) { 1874 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1875 } else { 1876 if (type == NORM_FROBENIUS) { 1877 v = amat->a; 1878 for (i=0; i<amat->nz; i++) { 1879 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1880 } 1881 v = bmat->a; 1882 for (i=0; i<bmat->nz; i++) { 1883 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1884 } 1885 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1886 *norm = PetscSqrtReal(*norm); 1887 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1888 } else if (type == NORM_1) { /* max column norm */ 1889 PetscReal *tmp,*tmp2; 1890 PetscInt *jj,*garray = aij->garray; 1891 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1892 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1893 *norm = 0.0; 1894 v = amat->a; jj = amat->j; 1895 for (j=0; j<amat->nz; j++) { 1896 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1897 } 1898 v = bmat->a; jj = bmat->j; 1899 for (j=0; j<bmat->nz; j++) { 1900 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1901 } 1902 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1903 for (j=0; j<mat->cmap->N; j++) { 1904 if (tmp2[j] > *norm) *norm = tmp2[j]; 1905 } 1906 ierr = PetscFree(tmp);CHKERRQ(ierr); 1907 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1908 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1909 } else if (type == NORM_INFINITY) { /* max row norm */ 1910 PetscReal ntemp = 0.0; 1911 for (j=0; j<aij->A->rmap->n; j++) { 1912 v = amat->a + amat->i[j]; 1913 sum = 0.0; 1914 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1915 sum += PetscAbsScalar(*v); v++; 1916 } 1917 v = bmat->a + bmat->i[j]; 1918 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1919 sum += PetscAbsScalar(*v); v++; 1920 } 1921 if (sum > ntemp) ntemp = sum; 1922 } 1923 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1924 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1925 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1926 } 1927 PetscFunctionReturn(0); 1928 } 1929 1930 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1931 { 1932 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1933 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1934 PetscErrorCode ierr; 1935 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1936 PetscInt cstart = A->cmap->rstart,ncol; 1937 Mat B; 1938 MatScalar *array; 1939 1940 PetscFunctionBegin; 1941 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1942 ai = Aloc->i; aj = Aloc->j; 1943 bi = Bloc->i; bj = Bloc->j; 1944 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1945 PetscInt *d_nnz,*g_nnz,*o_nnz; 1946 PetscSFNode *oloc; 1947 PETSC_UNUSED PetscSF sf; 1948 1949 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1950 /* compute d_nnz for preallocation */ 1951 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1952 for (i=0; i<ai[ma]; i++) { 1953 d_nnz[aj[i]]++; 1954 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1955 } 1956 /* compute local off-diagonal contributions */ 1957 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1958 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1959 /* map those to global */ 1960 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1961 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1962 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1963 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1964 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1965 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1966 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1967 1968 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1969 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1970 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1971 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1972 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1973 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1974 } else { 1975 B = *matout; 1976 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1977 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1978 } 1979 1980 /* copy over the A part */ 1981 array = Aloc->a; 1982 row = A->rmap->rstart; 1983 for (i=0; i<ma; i++) { 1984 ncol = ai[i+1]-ai[i]; 1985 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1986 row++; 1987 array += ncol; aj += ncol; 1988 } 1989 aj = Aloc->j; 1990 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1991 1992 /* copy over the B part */ 1993 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1994 array = Bloc->a; 1995 row = A->rmap->rstart; 1996 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1997 cols_tmp = cols; 1998 for (i=0; i<mb; i++) { 1999 ncol = bi[i+1]-bi[i]; 2000 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2001 row++; 2002 array += ncol; cols_tmp += ncol; 2003 } 2004 ierr = PetscFree(cols);CHKERRQ(ierr); 2005 2006 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2007 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2008 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2009 *matout = B; 2010 } else { 2011 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2017 { 2018 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2019 Mat a = aij->A,b = aij->B; 2020 PetscErrorCode ierr; 2021 PetscInt s1,s2,s3; 2022 2023 PetscFunctionBegin; 2024 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2025 if (rr) { 2026 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2027 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2028 /* Overlap communication with computation. */ 2029 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2030 } 2031 if (ll) { 2032 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2033 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2034 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2035 } 2036 /* scale the diagonal block */ 2037 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2038 2039 if (rr) { 2040 /* Do a scatter end and then right scale the off-diagonal block */ 2041 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2042 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2043 } 2044 PetscFunctionReturn(0); 2045 } 2046 2047 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2048 { 2049 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2050 PetscErrorCode ierr; 2051 2052 PetscFunctionBegin; 2053 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2054 PetscFunctionReturn(0); 2055 } 2056 2057 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2058 { 2059 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2060 Mat a,b,c,d; 2061 PetscBool flg; 2062 PetscErrorCode ierr; 2063 2064 PetscFunctionBegin; 2065 a = matA->A; b = matA->B; 2066 c = matB->A; d = matB->B; 2067 2068 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2069 if (flg) { 2070 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2071 } 2072 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2073 PetscFunctionReturn(0); 2074 } 2075 2076 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2077 { 2078 PetscErrorCode ierr; 2079 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2080 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2081 2082 PetscFunctionBegin; 2083 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2084 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2085 /* because of the column compression in the off-processor part of the matrix a->B, 2086 the number of columns in a->B and b->B may be different, hence we cannot call 2087 the MatCopy() directly on the two parts. If need be, we can provide a more 2088 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2089 then copying the submatrices */ 2090 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2091 } else { 2092 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2093 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2094 } 2095 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2096 PetscFunctionReturn(0); 2097 } 2098 2099 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2100 { 2101 PetscErrorCode ierr; 2102 2103 PetscFunctionBegin; 2104 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2105 PetscFunctionReturn(0); 2106 } 2107 2108 /* 2109 Computes the number of nonzeros per row needed for preallocation when X and Y 2110 have different nonzero structure. 2111 */ 2112 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2113 { 2114 PetscInt i,j,k,nzx,nzy; 2115 2116 PetscFunctionBegin; 2117 /* Set the number of nonzeros in the new matrix */ 2118 for (i=0; i<m; i++) { 2119 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2120 nzx = xi[i+1] - xi[i]; 2121 nzy = yi[i+1] - yi[i]; 2122 nnz[i] = 0; 2123 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2124 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2125 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2126 nnz[i]++; 2127 } 2128 for (; k<nzy; k++) nnz[i]++; 2129 } 2130 PetscFunctionReturn(0); 2131 } 2132 2133 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2134 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2135 { 2136 PetscErrorCode ierr; 2137 PetscInt m = Y->rmap->N; 2138 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2139 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2140 2141 PetscFunctionBegin; 2142 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2147 { 2148 PetscErrorCode ierr; 2149 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2150 PetscBLASInt bnz,one=1; 2151 Mat_SeqAIJ *x,*y; 2152 2153 PetscFunctionBegin; 2154 if (str == SAME_NONZERO_PATTERN) { 2155 PetscScalar alpha = a; 2156 x = (Mat_SeqAIJ*)xx->A->data; 2157 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2158 y = (Mat_SeqAIJ*)yy->A->data; 2159 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2160 x = (Mat_SeqAIJ*)xx->B->data; 2161 y = (Mat_SeqAIJ*)yy->B->data; 2162 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2163 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2164 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2165 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2166 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2167 } else { 2168 Mat B; 2169 PetscInt *nnz_d,*nnz_o; 2170 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2171 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2172 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2173 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2174 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2175 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2176 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2177 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2178 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2179 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2180 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2181 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2182 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2183 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2184 } 2185 PetscFunctionReturn(0); 2186 } 2187 2188 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2189 2190 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2191 { 2192 #if defined(PETSC_USE_COMPLEX) 2193 PetscErrorCode ierr; 2194 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2195 2196 PetscFunctionBegin; 2197 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2198 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2199 #else 2200 PetscFunctionBegin; 2201 #endif 2202 PetscFunctionReturn(0); 2203 } 2204 2205 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2206 { 2207 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2208 PetscErrorCode ierr; 2209 2210 PetscFunctionBegin; 2211 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2212 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2213 PetscFunctionReturn(0); 2214 } 2215 2216 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2217 { 2218 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2223 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2224 PetscFunctionReturn(0); 2225 } 2226 2227 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2228 { 2229 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2230 PetscErrorCode ierr; 2231 PetscInt i,*idxb = 0; 2232 PetscScalar *va,*vb; 2233 Vec vtmp; 2234 2235 PetscFunctionBegin; 2236 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2237 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2238 if (idx) { 2239 for (i=0; i<A->rmap->n; i++) { 2240 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2241 } 2242 } 2243 2244 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2245 if (idx) { 2246 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2247 } 2248 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2249 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2250 2251 for (i=0; i<A->rmap->n; i++) { 2252 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2253 va[i] = vb[i]; 2254 if (idx) idx[i] = a->garray[idxb[i]]; 2255 } 2256 } 2257 2258 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2259 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2260 ierr = PetscFree(idxb);CHKERRQ(ierr); 2261 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2262 PetscFunctionReturn(0); 2263 } 2264 2265 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2266 { 2267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2268 PetscErrorCode ierr; 2269 PetscInt i,*idxb = 0; 2270 PetscScalar *va,*vb; 2271 Vec vtmp; 2272 2273 PetscFunctionBegin; 2274 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2275 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2276 if (idx) { 2277 for (i=0; i<A->cmap->n; i++) { 2278 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2279 } 2280 } 2281 2282 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2283 if (idx) { 2284 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2285 } 2286 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2287 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2288 2289 for (i=0; i<A->rmap->n; i++) { 2290 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2291 va[i] = vb[i]; 2292 if (idx) idx[i] = a->garray[idxb[i]]; 2293 } 2294 } 2295 2296 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2297 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2298 ierr = PetscFree(idxb);CHKERRQ(ierr); 2299 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2304 { 2305 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2306 PetscInt n = A->rmap->n; 2307 PetscInt cstart = A->cmap->rstart; 2308 PetscInt *cmap = mat->garray; 2309 PetscInt *diagIdx, *offdiagIdx; 2310 Vec diagV, offdiagV; 2311 PetscScalar *a, *diagA, *offdiagA; 2312 PetscInt r; 2313 PetscErrorCode ierr; 2314 2315 PetscFunctionBegin; 2316 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2317 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2318 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2319 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2320 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2321 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2322 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2323 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2324 for (r = 0; r < n; ++r) { 2325 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2326 a[r] = diagA[r]; 2327 idx[r] = cstart + diagIdx[r]; 2328 } else { 2329 a[r] = offdiagA[r]; 2330 idx[r] = cmap[offdiagIdx[r]]; 2331 } 2332 } 2333 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2334 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2335 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2336 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2337 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2338 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2339 PetscFunctionReturn(0); 2340 } 2341 2342 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2343 { 2344 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2345 PetscInt n = A->rmap->n; 2346 PetscInt cstart = A->cmap->rstart; 2347 PetscInt *cmap = mat->garray; 2348 PetscInt *diagIdx, *offdiagIdx; 2349 Vec diagV, offdiagV; 2350 PetscScalar *a, *diagA, *offdiagA; 2351 PetscInt r; 2352 PetscErrorCode ierr; 2353 2354 PetscFunctionBegin; 2355 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2356 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2357 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2358 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2359 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2360 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2361 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2362 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2363 for (r = 0; r < n; ++r) { 2364 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2365 a[r] = diagA[r]; 2366 idx[r] = cstart + diagIdx[r]; 2367 } else { 2368 a[r] = offdiagA[r]; 2369 idx[r] = cmap[offdiagIdx[r]]; 2370 } 2371 } 2372 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2373 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2374 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2375 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2376 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2377 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2378 PetscFunctionReturn(0); 2379 } 2380 2381 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2382 { 2383 PetscErrorCode ierr; 2384 Mat *dummy; 2385 2386 PetscFunctionBegin; 2387 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2388 *newmat = *dummy; 2389 ierr = PetscFree(dummy);CHKERRQ(ierr); 2390 PetscFunctionReturn(0); 2391 } 2392 2393 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2394 { 2395 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2396 PetscErrorCode ierr; 2397 2398 PetscFunctionBegin; 2399 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2400 A->factorerrortype = a->A->factorerrortype; 2401 PetscFunctionReturn(0); 2402 } 2403 2404 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2405 { 2406 PetscErrorCode ierr; 2407 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2408 2409 PetscFunctionBegin; 2410 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2411 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2412 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2413 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2414 PetscFunctionReturn(0); 2415 } 2416 2417 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2418 { 2419 PetscFunctionBegin; 2420 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2421 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2422 PetscFunctionReturn(0); 2423 } 2424 2425 /*@ 2426 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2427 2428 Collective on Mat 2429 2430 Input Parameters: 2431 + A - the matrix 2432 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2433 2434 Level: advanced 2435 2436 @*/ 2437 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2438 { 2439 PetscErrorCode ierr; 2440 2441 PetscFunctionBegin; 2442 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2443 PetscFunctionReturn(0); 2444 } 2445 2446 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2447 { 2448 PetscErrorCode ierr; 2449 PetscBool sc = PETSC_FALSE,flg; 2450 2451 PetscFunctionBegin; 2452 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2453 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 2454 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2455 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2456 if (flg) { 2457 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2458 } 2459 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2460 PetscFunctionReturn(0); 2461 } 2462 2463 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2464 { 2465 PetscErrorCode ierr; 2466 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2467 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2468 2469 PetscFunctionBegin; 2470 if (!Y->preallocated) { 2471 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2472 } else if (!aij->nz) { 2473 PetscInt nonew = aij->nonew; 2474 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2475 aij->nonew = nonew; 2476 } 2477 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2482 { 2483 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2484 PetscErrorCode ierr; 2485 2486 PetscFunctionBegin; 2487 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2488 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2489 if (d) { 2490 PetscInt rstart; 2491 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2492 *d += rstart; 2493 2494 } 2495 PetscFunctionReturn(0); 2496 } 2497 2498 2499 /* -------------------------------------------------------------------*/ 2500 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2501 MatGetRow_MPIAIJ, 2502 MatRestoreRow_MPIAIJ, 2503 MatMult_MPIAIJ, 2504 /* 4*/ MatMultAdd_MPIAIJ, 2505 MatMultTranspose_MPIAIJ, 2506 MatMultTransposeAdd_MPIAIJ, 2507 0, 2508 0, 2509 0, 2510 /*10*/ 0, 2511 0, 2512 0, 2513 MatSOR_MPIAIJ, 2514 MatTranspose_MPIAIJ, 2515 /*15*/ MatGetInfo_MPIAIJ, 2516 MatEqual_MPIAIJ, 2517 MatGetDiagonal_MPIAIJ, 2518 MatDiagonalScale_MPIAIJ, 2519 MatNorm_MPIAIJ, 2520 /*20*/ MatAssemblyBegin_MPIAIJ, 2521 MatAssemblyEnd_MPIAIJ, 2522 MatSetOption_MPIAIJ, 2523 MatZeroEntries_MPIAIJ, 2524 /*24*/ MatZeroRows_MPIAIJ, 2525 0, 2526 0, 2527 0, 2528 0, 2529 /*29*/ MatSetUp_MPIAIJ, 2530 0, 2531 0, 2532 MatGetDiagonalBlock_MPIAIJ, 2533 0, 2534 /*34*/ MatDuplicate_MPIAIJ, 2535 0, 2536 0, 2537 0, 2538 0, 2539 /*39*/ MatAXPY_MPIAIJ, 2540 MatCreateSubMatrices_MPIAIJ, 2541 MatIncreaseOverlap_MPIAIJ, 2542 MatGetValues_MPIAIJ, 2543 MatCopy_MPIAIJ, 2544 /*44*/ MatGetRowMax_MPIAIJ, 2545 MatScale_MPIAIJ, 2546 MatShift_MPIAIJ, 2547 MatDiagonalSet_MPIAIJ, 2548 MatZeroRowsColumns_MPIAIJ, 2549 /*49*/ MatSetRandom_MPIAIJ, 2550 0, 2551 0, 2552 0, 2553 0, 2554 /*54*/ MatFDColoringCreate_MPIXAIJ, 2555 0, 2556 MatSetUnfactored_MPIAIJ, 2557 MatPermute_MPIAIJ, 2558 0, 2559 /*59*/ MatCreateSubMatrix_MPIAIJ, 2560 MatDestroy_MPIAIJ, 2561 MatView_MPIAIJ, 2562 0, 2563 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2564 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2565 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2566 0, 2567 0, 2568 0, 2569 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2570 MatGetRowMinAbs_MPIAIJ, 2571 0, 2572 0, 2573 0, 2574 0, 2575 /*75*/ MatFDColoringApply_AIJ, 2576 MatSetFromOptions_MPIAIJ, 2577 0, 2578 0, 2579 MatFindZeroDiagonals_MPIAIJ, 2580 /*80*/ 0, 2581 0, 2582 0, 2583 /*83*/ MatLoad_MPIAIJ, 2584 MatIsSymmetric_MPIAIJ, 2585 0, 2586 0, 2587 0, 2588 0, 2589 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2590 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2591 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2592 MatPtAP_MPIAIJ_MPIAIJ, 2593 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2594 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2595 0, 2596 0, 2597 0, 2598 0, 2599 /*99*/ 0, 2600 0, 2601 0, 2602 MatConjugate_MPIAIJ, 2603 0, 2604 /*104*/MatSetValuesRow_MPIAIJ, 2605 MatRealPart_MPIAIJ, 2606 MatImaginaryPart_MPIAIJ, 2607 0, 2608 0, 2609 /*109*/0, 2610 0, 2611 MatGetRowMin_MPIAIJ, 2612 0, 2613 MatMissingDiagonal_MPIAIJ, 2614 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2615 0, 2616 MatGetGhosts_MPIAIJ, 2617 0, 2618 0, 2619 /*119*/0, 2620 0, 2621 0, 2622 0, 2623 MatGetMultiProcBlock_MPIAIJ, 2624 /*124*/MatFindNonzeroRows_MPIAIJ, 2625 MatGetColumnNorms_MPIAIJ, 2626 MatInvertBlockDiagonal_MPIAIJ, 2627 0, 2628 MatCreateSubMatricesMPI_MPIAIJ, 2629 /*129*/0, 2630 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2631 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2632 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2633 0, 2634 /*134*/0, 2635 0, 2636 MatRARt_MPIAIJ_MPIAIJ, 2637 0, 2638 0, 2639 /*139*/MatSetBlockSizes_MPIAIJ, 2640 0, 2641 0, 2642 MatFDColoringSetUp_MPIXAIJ, 2643 MatFindOffBlockDiagonalEntries_MPIAIJ, 2644 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2645 }; 2646 2647 /* ----------------------------------------------------------------------------------------*/ 2648 2649 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2650 { 2651 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2652 PetscErrorCode ierr; 2653 2654 PetscFunctionBegin; 2655 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2656 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2657 PetscFunctionReturn(0); 2658 } 2659 2660 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2661 { 2662 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2663 PetscErrorCode ierr; 2664 2665 PetscFunctionBegin; 2666 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2667 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2668 PetscFunctionReturn(0); 2669 } 2670 2671 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2672 { 2673 Mat_MPIAIJ *b; 2674 PetscErrorCode ierr; 2675 2676 PetscFunctionBegin; 2677 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2678 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2679 b = (Mat_MPIAIJ*)B->data; 2680 2681 #if defined(PETSC_USE_CTABLE) 2682 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2683 #else 2684 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2685 #endif 2686 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2687 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2688 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2689 2690 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2691 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2692 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2693 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2694 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2695 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2696 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2697 2698 if (!B->preallocated) { 2699 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2700 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2701 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2702 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2703 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2704 } 2705 2706 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2707 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2708 B->preallocated = PETSC_TRUE; 2709 B->was_assembled = PETSC_FALSE; 2710 B->assembled = PETSC_FALSE;; 2711 PetscFunctionReturn(0); 2712 } 2713 2714 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2715 { 2716 Mat_MPIAIJ *b; 2717 PetscErrorCode ierr; 2718 2719 PetscFunctionBegin; 2720 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2721 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2722 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2723 b = (Mat_MPIAIJ*)B->data; 2724 2725 #if defined(PETSC_USE_CTABLE) 2726 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2727 #else 2728 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2729 #endif 2730 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2731 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2732 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2733 2734 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2735 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2736 B->preallocated = PETSC_TRUE; 2737 B->was_assembled = PETSC_FALSE; 2738 B->assembled = PETSC_FALSE; 2739 PetscFunctionReturn(0); 2740 } 2741 2742 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2743 { 2744 Mat mat; 2745 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2746 PetscErrorCode ierr; 2747 2748 PetscFunctionBegin; 2749 *newmat = 0; 2750 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2751 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2752 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2753 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2754 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2755 a = (Mat_MPIAIJ*)mat->data; 2756 2757 mat->factortype = matin->factortype; 2758 mat->assembled = PETSC_TRUE; 2759 mat->insertmode = NOT_SET_VALUES; 2760 mat->preallocated = PETSC_TRUE; 2761 2762 a->size = oldmat->size; 2763 a->rank = oldmat->rank; 2764 a->donotstash = oldmat->donotstash; 2765 a->roworiented = oldmat->roworiented; 2766 a->rowindices = 0; 2767 a->rowvalues = 0; 2768 a->getrowactive = PETSC_FALSE; 2769 2770 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2771 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2772 2773 if (oldmat->colmap) { 2774 #if defined(PETSC_USE_CTABLE) 2775 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2776 #else 2777 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2778 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2779 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2780 #endif 2781 } else a->colmap = 0; 2782 if (oldmat->garray) { 2783 PetscInt len; 2784 len = oldmat->B->cmap->n; 2785 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2786 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2787 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2788 } else a->garray = 0; 2789 2790 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2791 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2792 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2793 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2794 2795 if (oldmat->Mvctx_mpi1) { 2796 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2797 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2798 } 2799 2800 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2801 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2802 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2803 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2804 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2805 *newmat = mat; 2806 PetscFunctionReturn(0); 2807 } 2808 2809 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2810 { 2811 PetscScalar *vals,*svals; 2812 MPI_Comm comm; 2813 PetscErrorCode ierr; 2814 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2815 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2816 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2817 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2818 PetscInt cend,cstart,n,*rowners; 2819 int fd; 2820 PetscInt bs = newMat->rmap->bs; 2821 2822 PetscFunctionBegin; 2823 /* force binary viewer to load .info file if it has not yet done so */ 2824 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2825 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2826 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2827 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2828 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2829 if (!rank) { 2830 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2831 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2832 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2833 } 2834 2835 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2836 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2837 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2838 if (bs < 0) bs = 1; 2839 2840 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2841 M = header[1]; N = header[2]; 2842 2843 /* If global sizes are set, check if they are consistent with that given in the file */ 2844 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2845 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2846 2847 /* determine ownership of all (block) rows */ 2848 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2849 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2850 else m = newMat->rmap->n; /* Set by user */ 2851 2852 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2853 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2854 2855 /* First process needs enough room for process with most rows */ 2856 if (!rank) { 2857 mmax = rowners[1]; 2858 for (i=2; i<=size; i++) { 2859 mmax = PetscMax(mmax, rowners[i]); 2860 } 2861 } else mmax = -1; /* unused, but compilers complain */ 2862 2863 rowners[0] = 0; 2864 for (i=2; i<=size; i++) { 2865 rowners[i] += rowners[i-1]; 2866 } 2867 rstart = rowners[rank]; 2868 rend = rowners[rank+1]; 2869 2870 /* distribute row lengths to all processors */ 2871 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2872 if (!rank) { 2873 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2874 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2875 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2876 for (j=0; j<m; j++) { 2877 procsnz[0] += ourlens[j]; 2878 } 2879 for (i=1; i<size; i++) { 2880 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2881 /* calculate the number of nonzeros on each processor */ 2882 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2883 procsnz[i] += rowlengths[j]; 2884 } 2885 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2886 } 2887 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2888 } else { 2889 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2890 } 2891 2892 if (!rank) { 2893 /* determine max buffer needed and allocate it */ 2894 maxnz = 0; 2895 for (i=0; i<size; i++) { 2896 maxnz = PetscMax(maxnz,procsnz[i]); 2897 } 2898 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2899 2900 /* read in my part of the matrix column indices */ 2901 nz = procsnz[0]; 2902 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2903 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2904 2905 /* read in every one elses and ship off */ 2906 for (i=1; i<size; i++) { 2907 nz = procsnz[i]; 2908 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2909 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2910 } 2911 ierr = PetscFree(cols);CHKERRQ(ierr); 2912 } else { 2913 /* determine buffer space needed for message */ 2914 nz = 0; 2915 for (i=0; i<m; i++) { 2916 nz += ourlens[i]; 2917 } 2918 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2919 2920 /* receive message of column indices*/ 2921 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2922 } 2923 2924 /* determine column ownership if matrix is not square */ 2925 if (N != M) { 2926 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2927 else n = newMat->cmap->n; 2928 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2929 cstart = cend - n; 2930 } else { 2931 cstart = rstart; 2932 cend = rend; 2933 n = cend - cstart; 2934 } 2935 2936 /* loop over local rows, determining number of off diagonal entries */ 2937 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2938 jj = 0; 2939 for (i=0; i<m; i++) { 2940 for (j=0; j<ourlens[i]; j++) { 2941 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2942 jj++; 2943 } 2944 } 2945 2946 for (i=0; i<m; i++) { 2947 ourlens[i] -= offlens[i]; 2948 } 2949 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2950 2951 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2952 2953 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2954 2955 for (i=0; i<m; i++) { 2956 ourlens[i] += offlens[i]; 2957 } 2958 2959 if (!rank) { 2960 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2961 2962 /* read in my part of the matrix numerical values */ 2963 nz = procsnz[0]; 2964 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2965 2966 /* insert into matrix */ 2967 jj = rstart; 2968 smycols = mycols; 2969 svals = vals; 2970 for (i=0; i<m; i++) { 2971 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2972 smycols += ourlens[i]; 2973 svals += ourlens[i]; 2974 jj++; 2975 } 2976 2977 /* read in other processors and ship out */ 2978 for (i=1; i<size; i++) { 2979 nz = procsnz[i]; 2980 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2981 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2982 } 2983 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2984 } else { 2985 /* receive numeric values */ 2986 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2987 2988 /* receive message of values*/ 2989 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2990 2991 /* insert into matrix */ 2992 jj = rstart; 2993 smycols = mycols; 2994 svals = vals; 2995 for (i=0; i<m; i++) { 2996 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2997 smycols += ourlens[i]; 2998 svals += ourlens[i]; 2999 jj++; 3000 } 3001 } 3002 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3003 ierr = PetscFree(vals);CHKERRQ(ierr); 3004 ierr = PetscFree(mycols);CHKERRQ(ierr); 3005 ierr = PetscFree(rowners);CHKERRQ(ierr); 3006 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3007 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3008 PetscFunctionReturn(0); 3009 } 3010 3011 /* Not scalable because of ISAllGather() unless getting all columns. */ 3012 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3013 { 3014 PetscErrorCode ierr; 3015 IS iscol_local; 3016 PetscBool isstride; 3017 PetscMPIInt lisstride=0,gisstride; 3018 3019 PetscFunctionBegin; 3020 /* check if we are grabbing all columns*/ 3021 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3022 3023 if (isstride) { 3024 PetscInt start,len,mstart,mlen; 3025 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3026 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3027 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3028 if (mstart == start && mlen-mstart == len) lisstride = 1; 3029 } 3030 3031 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3032 if (gisstride) { 3033 PetscInt N; 3034 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3035 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3036 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3037 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3038 } else { 3039 PetscInt cbs; 3040 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3041 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3042 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3043 } 3044 3045 *isseq = iscol_local; 3046 PetscFunctionReturn(0); 3047 } 3048 3049 /* 3050 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3051 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3052 3053 Input Parameters: 3054 mat - matrix 3055 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3056 i.e., mat->rstart <= isrow[i] < mat->rend 3057 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3058 i.e., mat->cstart <= iscol[i] < mat->cend 3059 Output Parameter: 3060 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3061 iscol_o - sequential column index set for retrieving mat->B 3062 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3063 */ 3064 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3065 { 3066 PetscErrorCode ierr; 3067 Vec x,cmap; 3068 const PetscInt *is_idx; 3069 PetscScalar *xarray,*cmaparray; 3070 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3071 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3072 Mat B=a->B; 3073 Vec lvec=a->lvec,lcmap; 3074 PetscInt i,cstart,cend,Bn=B->cmap->N; 3075 MPI_Comm comm; 3076 VecScatter Mvctx=a->Mvctx; 3077 3078 PetscFunctionBegin; 3079 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3080 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3081 3082 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3083 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3084 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3085 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3086 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3087 3088 /* Get start indices */ 3089 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3090 isstart -= ncols; 3091 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3092 3093 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3094 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3095 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3096 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3097 for (i=0; i<ncols; i++) { 3098 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3099 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3100 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3101 } 3102 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3103 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3104 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3105 3106 /* Get iscol_d */ 3107 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3108 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3109 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3110 3111 /* Get isrow_d */ 3112 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3113 rstart = mat->rmap->rstart; 3114 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3115 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3116 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3117 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3118 3119 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3120 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3121 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3122 3123 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3124 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3125 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3126 3127 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3128 3129 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3130 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3131 3132 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3133 /* off-process column indices */ 3134 count = 0; 3135 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3136 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3137 3138 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3139 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3140 for (i=0; i<Bn; i++) { 3141 if (PetscRealPart(xarray[i]) > -1.0) { 3142 idx[count] = i; /* local column index in off-diagonal part B */ 3143 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3144 count++; 3145 } 3146 } 3147 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3148 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3149 3150 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3151 /* cannot ensure iscol_o has same blocksize as iscol! */ 3152 3153 ierr = PetscFree(idx);CHKERRQ(ierr); 3154 *garray = cmap1; 3155 3156 ierr = VecDestroy(&x);CHKERRQ(ierr); 3157 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3158 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3159 PetscFunctionReturn(0); 3160 } 3161 3162 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3163 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3164 { 3165 PetscErrorCode ierr; 3166 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3167 Mat M = NULL; 3168 MPI_Comm comm; 3169 IS iscol_d,isrow_d,iscol_o; 3170 Mat Asub = NULL,Bsub = NULL; 3171 PetscInt n; 3172 3173 PetscFunctionBegin; 3174 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3175 3176 if (call == MAT_REUSE_MATRIX) { 3177 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3178 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3179 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3180 3181 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3182 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3183 3184 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3185 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3186 3187 /* Update diagonal and off-diagonal portions of submat */ 3188 asub = (Mat_MPIAIJ*)(*submat)->data; 3189 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3190 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3191 if (n) { 3192 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3193 } 3194 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3195 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3196 3197 } else { /* call == MAT_INITIAL_MATRIX) */ 3198 const PetscInt *garray; 3199 PetscInt BsubN; 3200 3201 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3202 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3203 3204 /* Create local submatrices Asub and Bsub */ 3205 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3206 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3207 3208 /* Create submatrix M */ 3209 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3210 3211 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3212 asub = (Mat_MPIAIJ*)M->data; 3213 3214 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3215 n = asub->B->cmap->N; 3216 if (BsubN > n) { 3217 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3218 const PetscInt *idx; 3219 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3220 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3221 3222 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3223 j = 0; 3224 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3225 for (i=0; i<n; i++) { 3226 if (j >= BsubN) break; 3227 while (subgarray[i] > garray[j]) j++; 3228 3229 if (subgarray[i] == garray[j]) { 3230 idx_new[i] = idx[j++]; 3231 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3232 } 3233 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3234 3235 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3236 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3237 3238 } else if (BsubN < n) { 3239 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3240 } 3241 3242 ierr = PetscFree(garray);CHKERRQ(ierr); 3243 *submat = M; 3244 3245 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3246 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3247 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3248 3249 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3250 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3251 3252 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3253 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3254 } 3255 PetscFunctionReturn(0); 3256 } 3257 3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3259 { 3260 PetscErrorCode ierr; 3261 IS iscol_local=NULL,isrow_d; 3262 PetscInt csize; 3263 PetscInt n,i,j,start,end; 3264 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3265 MPI_Comm comm; 3266 3267 PetscFunctionBegin; 3268 /* If isrow has same processor distribution as mat, 3269 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3270 if (call == MAT_REUSE_MATRIX) { 3271 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3272 if (isrow_d) { 3273 sameRowDist = PETSC_TRUE; 3274 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3275 } else { 3276 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3277 if (iscol_local) { 3278 sameRowDist = PETSC_TRUE; 3279 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3280 } 3281 } 3282 } else { 3283 /* Check if isrow has same processor distribution as mat */ 3284 sameDist[0] = PETSC_FALSE; 3285 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3286 if (!n) { 3287 sameDist[0] = PETSC_TRUE; 3288 } else { 3289 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3290 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3291 if (i >= start && j < end) { 3292 sameDist[0] = PETSC_TRUE; 3293 } 3294 } 3295 3296 /* Check if iscol has same processor distribution as mat */ 3297 sameDist[1] = PETSC_FALSE; 3298 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3299 if (!n) { 3300 sameDist[1] = PETSC_TRUE; 3301 } else { 3302 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3303 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3304 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3305 } 3306 3307 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3308 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3309 sameRowDist = tsameDist[0]; 3310 } 3311 3312 if (sameRowDist) { 3313 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3314 /* isrow and iscol have same processor distribution as mat */ 3315 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3316 PetscFunctionReturn(0); 3317 } else { /* sameRowDist */ 3318 /* isrow has same processor distribution as mat */ 3319 if (call == MAT_INITIAL_MATRIX) { 3320 PetscBool sorted; 3321 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3322 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3323 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3324 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3325 3326 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3327 if (sorted) { 3328 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3329 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3330 PetscFunctionReturn(0); 3331 } 3332 } else { /* call == MAT_REUSE_MATRIX */ 3333 IS iscol_sub; 3334 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3335 if (iscol_sub) { 3336 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3337 PetscFunctionReturn(0); 3338 } 3339 } 3340 } 3341 } 3342 3343 /* General case: iscol -> iscol_local which has global size of iscol */ 3344 if (call == MAT_REUSE_MATRIX) { 3345 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3346 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3347 } else { 3348 if (!iscol_local) { 3349 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3350 } 3351 } 3352 3353 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3354 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3355 3356 if (call == MAT_INITIAL_MATRIX) { 3357 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3358 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3359 } 3360 PetscFunctionReturn(0); 3361 } 3362 3363 /*@C 3364 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3365 and "off-diagonal" part of the matrix in CSR format. 3366 3367 Collective on MPI_Comm 3368 3369 Input Parameters: 3370 + comm - MPI communicator 3371 . A - "diagonal" portion of matrix 3372 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3373 - garray - global index of B columns 3374 3375 Output Parameter: 3376 . mat - the matrix, with input A as its local diagonal matrix 3377 Level: advanced 3378 3379 Notes: 3380 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3381 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3382 3383 .seealso: MatCreateMPIAIJWithSplitArrays() 3384 @*/ 3385 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3386 { 3387 PetscErrorCode ierr; 3388 Mat_MPIAIJ *maij; 3389 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3390 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3391 PetscScalar *oa=b->a; 3392 Mat Bnew; 3393 PetscInt m,n,N; 3394 3395 PetscFunctionBegin; 3396 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3397 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3398 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3399 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3400 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3401 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3402 3403 /* Get global columns of mat */ 3404 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3405 3406 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3407 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3408 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3409 maij = (Mat_MPIAIJ*)(*mat)->data; 3410 3411 (*mat)->preallocated = PETSC_TRUE; 3412 3413 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3414 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3415 3416 /* Set A as diagonal portion of *mat */ 3417 maij->A = A; 3418 3419 nz = oi[m]; 3420 for (i=0; i<nz; i++) { 3421 col = oj[i]; 3422 oj[i] = garray[col]; 3423 } 3424 3425 /* Set Bnew as off-diagonal portion of *mat */ 3426 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3427 bnew = (Mat_SeqAIJ*)Bnew->data; 3428 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3429 maij->B = Bnew; 3430 3431 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3432 3433 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3434 b->free_a = PETSC_FALSE; 3435 b->free_ij = PETSC_FALSE; 3436 ierr = MatDestroy(&B);CHKERRQ(ierr); 3437 3438 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3439 bnew->free_a = PETSC_TRUE; 3440 bnew->free_ij = PETSC_TRUE; 3441 3442 /* condense columns of maij->B */ 3443 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3444 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3445 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3446 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3447 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3448 PetscFunctionReturn(0); 3449 } 3450 3451 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3452 3453 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3454 { 3455 PetscErrorCode ierr; 3456 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3457 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3458 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3459 Mat M,Msub,B=a->B; 3460 MatScalar *aa; 3461 Mat_SeqAIJ *aij; 3462 PetscInt *garray = a->garray,*colsub,Ncols; 3463 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3464 IS iscol_sub,iscmap; 3465 const PetscInt *is_idx,*cmap; 3466 PetscBool allcolumns=PETSC_FALSE; 3467 MPI_Comm comm; 3468 3469 PetscFunctionBegin; 3470 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3471 3472 if (call == MAT_REUSE_MATRIX) { 3473 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3474 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3475 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3476 3477 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3478 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3479 3480 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3481 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3482 3483 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3484 3485 } else { /* call == MAT_INITIAL_MATRIX) */ 3486 PetscBool flg; 3487 3488 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3489 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3490 3491 /* (1) iscol -> nonscalable iscol_local */ 3492 /* Check for special case: each processor gets entire matrix columns */ 3493 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3494 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3495 if (allcolumns) { 3496 iscol_sub = iscol_local; 3497 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3498 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3499 3500 } else { 3501 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3502 PetscInt *idx,*cmap1,k; 3503 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3504 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3505 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3506 count = 0; 3507 k = 0; 3508 for (i=0; i<Ncols; i++) { 3509 j = is_idx[i]; 3510 if (j >= cstart && j < cend) { 3511 /* diagonal part of mat */ 3512 idx[count] = j; 3513 cmap1[count++] = i; /* column index in submat */ 3514 } else if (Bn) { 3515 /* off-diagonal part of mat */ 3516 if (j == garray[k]) { 3517 idx[count] = j; 3518 cmap1[count++] = i; /* column index in submat */ 3519 } else if (j > garray[k]) { 3520 while (j > garray[k] && k < Bn-1) k++; 3521 if (j == garray[k]) { 3522 idx[count] = j; 3523 cmap1[count++] = i; /* column index in submat */ 3524 } 3525 } 3526 } 3527 } 3528 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3529 3530 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3531 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3532 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3533 3534 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3535 } 3536 3537 /* (3) Create sequential Msub */ 3538 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3539 } 3540 3541 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3542 aij = (Mat_SeqAIJ*)(Msub)->data; 3543 ii = aij->i; 3544 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3545 3546 /* 3547 m - number of local rows 3548 Ncols - number of columns (same on all processors) 3549 rstart - first row in new global matrix generated 3550 */ 3551 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3552 3553 if (call == MAT_INITIAL_MATRIX) { 3554 /* (4) Create parallel newmat */ 3555 PetscMPIInt rank,size; 3556 PetscInt csize; 3557 3558 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3559 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3560 3561 /* 3562 Determine the number of non-zeros in the diagonal and off-diagonal 3563 portions of the matrix in order to do correct preallocation 3564 */ 3565 3566 /* first get start and end of "diagonal" columns */ 3567 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3568 if (csize == PETSC_DECIDE) { 3569 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3570 if (mglobal == Ncols) { /* square matrix */ 3571 nlocal = m; 3572 } else { 3573 nlocal = Ncols/size + ((Ncols % size) > rank); 3574 } 3575 } else { 3576 nlocal = csize; 3577 } 3578 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3579 rstart = rend - nlocal; 3580 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3581 3582 /* next, compute all the lengths */ 3583 jj = aij->j; 3584 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3585 olens = dlens + m; 3586 for (i=0; i<m; i++) { 3587 jend = ii[i+1] - ii[i]; 3588 olen = 0; 3589 dlen = 0; 3590 for (j=0; j<jend; j++) { 3591 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3592 else dlen++; 3593 jj++; 3594 } 3595 olens[i] = olen; 3596 dlens[i] = dlen; 3597 } 3598 3599 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3600 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3601 3602 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3603 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3604 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3605 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3606 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3607 ierr = PetscFree(dlens);CHKERRQ(ierr); 3608 3609 } else { /* call == MAT_REUSE_MATRIX */ 3610 M = *newmat; 3611 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3612 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3613 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3614 /* 3615 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3616 rather than the slower MatSetValues(). 3617 */ 3618 M->was_assembled = PETSC_TRUE; 3619 M->assembled = PETSC_FALSE; 3620 } 3621 3622 /* (5) Set values of Msub to *newmat */ 3623 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3624 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3625 3626 jj = aij->j; 3627 aa = aij->a; 3628 for (i=0; i<m; i++) { 3629 row = rstart + i; 3630 nz = ii[i+1] - ii[i]; 3631 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3632 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3633 jj += nz; aa += nz; 3634 } 3635 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3636 3637 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3638 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3639 3640 ierr = PetscFree(colsub);CHKERRQ(ierr); 3641 3642 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3643 if (call == MAT_INITIAL_MATRIX) { 3644 *newmat = M; 3645 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3646 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3647 3648 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3649 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3650 3651 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3652 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3653 3654 if (iscol_local) { 3655 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3656 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3657 } 3658 } 3659 PetscFunctionReturn(0); 3660 } 3661 3662 /* 3663 Not great since it makes two copies of the submatrix, first an SeqAIJ 3664 in local and then by concatenating the local matrices the end result. 3665 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3666 3667 Note: This requires a sequential iscol with all indices. 3668 */ 3669 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3670 { 3671 PetscErrorCode ierr; 3672 PetscMPIInt rank,size; 3673 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3674 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3675 Mat M,Mreuse; 3676 MatScalar *aa,*vwork; 3677 MPI_Comm comm; 3678 Mat_SeqAIJ *aij; 3679 PetscBool colflag,allcolumns=PETSC_FALSE; 3680 3681 PetscFunctionBegin; 3682 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3683 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3684 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3685 3686 /* Check for special case: each processor gets entire matrix columns */ 3687 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3688 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3689 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3690 3691 if (call == MAT_REUSE_MATRIX) { 3692 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3693 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3694 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3695 } else { 3696 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3697 } 3698 3699 /* 3700 m - number of local rows 3701 n - number of columns (same on all processors) 3702 rstart - first row in new global matrix generated 3703 */ 3704 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3705 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3706 if (call == MAT_INITIAL_MATRIX) { 3707 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3708 ii = aij->i; 3709 jj = aij->j; 3710 3711 /* 3712 Determine the number of non-zeros in the diagonal and off-diagonal 3713 portions of the matrix in order to do correct preallocation 3714 */ 3715 3716 /* first get start and end of "diagonal" columns */ 3717 if (csize == PETSC_DECIDE) { 3718 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3719 if (mglobal == n) { /* square matrix */ 3720 nlocal = m; 3721 } else { 3722 nlocal = n/size + ((n % size) > rank); 3723 } 3724 } else { 3725 nlocal = csize; 3726 } 3727 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3728 rstart = rend - nlocal; 3729 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3730 3731 /* next, compute all the lengths */ 3732 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3733 olens = dlens + m; 3734 for (i=0; i<m; i++) { 3735 jend = ii[i+1] - ii[i]; 3736 olen = 0; 3737 dlen = 0; 3738 for (j=0; j<jend; j++) { 3739 if (*jj < rstart || *jj >= rend) olen++; 3740 else dlen++; 3741 jj++; 3742 } 3743 olens[i] = olen; 3744 dlens[i] = dlen; 3745 } 3746 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3747 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3748 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3749 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3750 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3751 ierr = PetscFree(dlens);CHKERRQ(ierr); 3752 } else { 3753 PetscInt ml,nl; 3754 3755 M = *newmat; 3756 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3757 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3758 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3759 /* 3760 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3761 rather than the slower MatSetValues(). 3762 */ 3763 M->was_assembled = PETSC_TRUE; 3764 M->assembled = PETSC_FALSE; 3765 } 3766 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3767 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3768 ii = aij->i; 3769 jj = aij->j; 3770 aa = aij->a; 3771 for (i=0; i<m; i++) { 3772 row = rstart + i; 3773 nz = ii[i+1] - ii[i]; 3774 cwork = jj; jj += nz; 3775 vwork = aa; aa += nz; 3776 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3777 } 3778 3779 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3780 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3781 *newmat = M; 3782 3783 /* save submatrix used in processor for next request */ 3784 if (call == MAT_INITIAL_MATRIX) { 3785 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3786 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3787 } 3788 PetscFunctionReturn(0); 3789 } 3790 3791 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3792 { 3793 PetscInt m,cstart, cend,j,nnz,i,d; 3794 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3795 const PetscInt *JJ; 3796 PetscScalar *values; 3797 PetscErrorCode ierr; 3798 PetscBool nooffprocentries; 3799 3800 PetscFunctionBegin; 3801 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3802 3803 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3804 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3805 m = B->rmap->n; 3806 cstart = B->cmap->rstart; 3807 cend = B->cmap->rend; 3808 rstart = B->rmap->rstart; 3809 3810 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3811 3812 #if defined(PETSC_USE_DEBUG) 3813 for (i=0; i<m; i++) { 3814 nnz = Ii[i+1]- Ii[i]; 3815 JJ = J + Ii[i]; 3816 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3817 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3818 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3819 } 3820 #endif 3821 3822 for (i=0; i<m; i++) { 3823 nnz = Ii[i+1]- Ii[i]; 3824 JJ = J + Ii[i]; 3825 nnz_max = PetscMax(nnz_max,nnz); 3826 d = 0; 3827 for (j=0; j<nnz; j++) { 3828 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3829 } 3830 d_nnz[i] = d; 3831 o_nnz[i] = nnz - d; 3832 } 3833 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3834 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3835 3836 if (v) values = (PetscScalar*)v; 3837 else { 3838 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3839 } 3840 3841 for (i=0; i<m; i++) { 3842 ii = i + rstart; 3843 nnz = Ii[i+1]- Ii[i]; 3844 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3845 } 3846 nooffprocentries = B->nooffprocentries; 3847 B->nooffprocentries = PETSC_TRUE; 3848 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3849 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3850 B->nooffprocentries = nooffprocentries; 3851 3852 if (!v) { 3853 ierr = PetscFree(values);CHKERRQ(ierr); 3854 } 3855 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3856 PetscFunctionReturn(0); 3857 } 3858 3859 /*@ 3860 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3861 (the default parallel PETSc format). 3862 3863 Collective on MPI_Comm 3864 3865 Input Parameters: 3866 + B - the matrix 3867 . i - the indices into j for the start of each local row (starts with zero) 3868 . j - the column indices for each local row (starts with zero) 3869 - v - optional values in the matrix 3870 3871 Level: developer 3872 3873 Notes: 3874 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3875 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3876 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3877 3878 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3879 3880 The format which is used for the sparse matrix input, is equivalent to a 3881 row-major ordering.. i.e for the following matrix, the input data expected is 3882 as shown 3883 3884 $ 1 0 0 3885 $ 2 0 3 P0 3886 $ ------- 3887 $ 4 5 6 P1 3888 $ 3889 $ Process0 [P0]: rows_owned=[0,1] 3890 $ i = {0,1,3} [size = nrow+1 = 2+1] 3891 $ j = {0,0,2} [size = 3] 3892 $ v = {1,2,3} [size = 3] 3893 $ 3894 $ Process1 [P1]: rows_owned=[2] 3895 $ i = {0,3} [size = nrow+1 = 1+1] 3896 $ j = {0,1,2} [size = 3] 3897 $ v = {4,5,6} [size = 3] 3898 3899 .keywords: matrix, aij, compressed row, sparse, parallel 3900 3901 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3902 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3903 @*/ 3904 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3905 { 3906 PetscErrorCode ierr; 3907 3908 PetscFunctionBegin; 3909 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3910 PetscFunctionReturn(0); 3911 } 3912 3913 /*@C 3914 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3915 (the default parallel PETSc format). For good matrix assembly performance 3916 the user should preallocate the matrix storage by setting the parameters 3917 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3918 performance can be increased by more than a factor of 50. 3919 3920 Collective on MPI_Comm 3921 3922 Input Parameters: 3923 + B - the matrix 3924 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3925 (same value is used for all local rows) 3926 . d_nnz - array containing the number of nonzeros in the various rows of the 3927 DIAGONAL portion of the local submatrix (possibly different for each row) 3928 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3929 The size of this array is equal to the number of local rows, i.e 'm'. 3930 For matrices that will be factored, you must leave room for (and set) 3931 the diagonal entry even if it is zero. 3932 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3933 submatrix (same value is used for all local rows). 3934 - o_nnz - array containing the number of nonzeros in the various rows of the 3935 OFF-DIAGONAL portion of the local submatrix (possibly different for 3936 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3937 structure. The size of this array is equal to the number 3938 of local rows, i.e 'm'. 3939 3940 If the *_nnz parameter is given then the *_nz parameter is ignored 3941 3942 The AIJ format (also called the Yale sparse matrix format or 3943 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3944 storage. The stored row and column indices begin with zero. 3945 See Users-Manual: ch_mat for details. 3946 3947 The parallel matrix is partitioned such that the first m0 rows belong to 3948 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3949 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3950 3951 The DIAGONAL portion of the local submatrix of a processor can be defined 3952 as the submatrix which is obtained by extraction the part corresponding to 3953 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3954 first row that belongs to the processor, r2 is the last row belonging to 3955 the this processor, and c1-c2 is range of indices of the local part of a 3956 vector suitable for applying the matrix to. This is an mxn matrix. In the 3957 common case of a square matrix, the row and column ranges are the same and 3958 the DIAGONAL part is also square. The remaining portion of the local 3959 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3960 3961 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3962 3963 You can call MatGetInfo() to get information on how effective the preallocation was; 3964 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3965 You can also run with the option -info and look for messages with the string 3966 malloc in them to see if additional memory allocation was needed. 3967 3968 Example usage: 3969 3970 Consider the following 8x8 matrix with 34 non-zero values, that is 3971 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3972 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3973 as follows: 3974 3975 .vb 3976 1 2 0 | 0 3 0 | 0 4 3977 Proc0 0 5 6 | 7 0 0 | 8 0 3978 9 0 10 | 11 0 0 | 12 0 3979 ------------------------------------- 3980 13 0 14 | 15 16 17 | 0 0 3981 Proc1 0 18 0 | 19 20 21 | 0 0 3982 0 0 0 | 22 23 0 | 24 0 3983 ------------------------------------- 3984 Proc2 25 26 27 | 0 0 28 | 29 0 3985 30 0 0 | 31 32 33 | 0 34 3986 .ve 3987 3988 This can be represented as a collection of submatrices as: 3989 3990 .vb 3991 A B C 3992 D E F 3993 G H I 3994 .ve 3995 3996 Where the submatrices A,B,C are owned by proc0, D,E,F are 3997 owned by proc1, G,H,I are owned by proc2. 3998 3999 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4000 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4001 The 'M','N' parameters are 8,8, and have the same values on all procs. 4002 4003 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4004 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4005 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4006 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4007 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4008 matrix, ans [DF] as another SeqAIJ matrix. 4009 4010 When d_nz, o_nz parameters are specified, d_nz storage elements are 4011 allocated for every row of the local diagonal submatrix, and o_nz 4012 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4013 One way to choose d_nz and o_nz is to use the max nonzerors per local 4014 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4015 In this case, the values of d_nz,o_nz are: 4016 .vb 4017 proc0 : dnz = 2, o_nz = 2 4018 proc1 : dnz = 3, o_nz = 2 4019 proc2 : dnz = 1, o_nz = 4 4020 .ve 4021 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4022 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4023 for proc3. i.e we are using 12+15+10=37 storage locations to store 4024 34 values. 4025 4026 When d_nnz, o_nnz parameters are specified, the storage is specified 4027 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4028 In the above case the values for d_nnz,o_nnz are: 4029 .vb 4030 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4031 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4032 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4033 .ve 4034 Here the space allocated is sum of all the above values i.e 34, and 4035 hence pre-allocation is perfect. 4036 4037 Level: intermediate 4038 4039 .keywords: matrix, aij, compressed row, sparse, parallel 4040 4041 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4042 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4043 @*/ 4044 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4045 { 4046 PetscErrorCode ierr; 4047 4048 PetscFunctionBegin; 4049 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4050 PetscValidType(B,1); 4051 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4052 PetscFunctionReturn(0); 4053 } 4054 4055 /*@ 4056 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4057 CSR format the local rows. 4058 4059 Collective on MPI_Comm 4060 4061 Input Parameters: 4062 + comm - MPI communicator 4063 . m - number of local rows (Cannot be PETSC_DECIDE) 4064 . n - This value should be the same as the local size used in creating the 4065 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4066 calculated if N is given) For square matrices n is almost always m. 4067 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4068 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4069 . i - row indices 4070 . j - column indices 4071 - a - matrix values 4072 4073 Output Parameter: 4074 . mat - the matrix 4075 4076 Level: intermediate 4077 4078 Notes: 4079 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4080 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4081 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4082 4083 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4084 4085 The format which is used for the sparse matrix input, is equivalent to a 4086 row-major ordering.. i.e for the following matrix, the input data expected is 4087 as shown 4088 4089 $ 1 0 0 4090 $ 2 0 3 P0 4091 $ ------- 4092 $ 4 5 6 P1 4093 $ 4094 $ Process0 [P0]: rows_owned=[0,1] 4095 $ i = {0,1,3} [size = nrow+1 = 2+1] 4096 $ j = {0,0,2} [size = 3] 4097 $ v = {1,2,3} [size = 3] 4098 $ 4099 $ Process1 [P1]: rows_owned=[2] 4100 $ i = {0,3} [size = nrow+1 = 1+1] 4101 $ j = {0,1,2} [size = 3] 4102 $ v = {4,5,6} [size = 3] 4103 4104 .keywords: matrix, aij, compressed row, sparse, parallel 4105 4106 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4107 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4108 @*/ 4109 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4110 { 4111 PetscErrorCode ierr; 4112 4113 PetscFunctionBegin; 4114 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4115 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4116 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4117 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4118 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4119 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4120 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4121 PetscFunctionReturn(0); 4122 } 4123 4124 /*@C 4125 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4126 (the default parallel PETSc format). For good matrix assembly performance 4127 the user should preallocate the matrix storage by setting the parameters 4128 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4129 performance can be increased by more than a factor of 50. 4130 4131 Collective on MPI_Comm 4132 4133 Input Parameters: 4134 + comm - MPI communicator 4135 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4136 This value should be the same as the local size used in creating the 4137 y vector for the matrix-vector product y = Ax. 4138 . n - This value should be the same as the local size used in creating the 4139 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4140 calculated if N is given) For square matrices n is almost always m. 4141 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4142 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4143 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4144 (same value is used for all local rows) 4145 . d_nnz - array containing the number of nonzeros in the various rows of the 4146 DIAGONAL portion of the local submatrix (possibly different for each row) 4147 or NULL, if d_nz is used to specify the nonzero structure. 4148 The size of this array is equal to the number of local rows, i.e 'm'. 4149 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4150 submatrix (same value is used for all local rows). 4151 - o_nnz - array containing the number of nonzeros in the various rows of the 4152 OFF-DIAGONAL portion of the local submatrix (possibly different for 4153 each row) or NULL, if o_nz is used to specify the nonzero 4154 structure. The size of this array is equal to the number 4155 of local rows, i.e 'm'. 4156 4157 Output Parameter: 4158 . A - the matrix 4159 4160 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4161 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4162 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4163 4164 Notes: 4165 If the *_nnz parameter is given then the *_nz parameter is ignored 4166 4167 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4168 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4169 storage requirements for this matrix. 4170 4171 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4172 processor than it must be used on all processors that share the object for 4173 that argument. 4174 4175 The user MUST specify either the local or global matrix dimensions 4176 (possibly both). 4177 4178 The parallel matrix is partitioned across processors such that the 4179 first m0 rows belong to process 0, the next m1 rows belong to 4180 process 1, the next m2 rows belong to process 2 etc.. where 4181 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4182 values corresponding to [m x N] submatrix. 4183 4184 The columns are logically partitioned with the n0 columns belonging 4185 to 0th partition, the next n1 columns belonging to the next 4186 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4187 4188 The DIAGONAL portion of the local submatrix on any given processor 4189 is the submatrix corresponding to the rows and columns m,n 4190 corresponding to the given processor. i.e diagonal matrix on 4191 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4192 etc. The remaining portion of the local submatrix [m x (N-n)] 4193 constitute the OFF-DIAGONAL portion. The example below better 4194 illustrates this concept. 4195 4196 For a square global matrix we define each processor's diagonal portion 4197 to be its local rows and the corresponding columns (a square submatrix); 4198 each processor's off-diagonal portion encompasses the remainder of the 4199 local matrix (a rectangular submatrix). 4200 4201 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4202 4203 When calling this routine with a single process communicator, a matrix of 4204 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4205 type of communicator, use the construction mechanism 4206 .vb 4207 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4208 .ve 4209 4210 $ MatCreate(...,&A); 4211 $ MatSetType(A,MATMPIAIJ); 4212 $ MatSetSizes(A, m,n,M,N); 4213 $ MatMPIAIJSetPreallocation(A,...); 4214 4215 By default, this format uses inodes (identical nodes) when possible. 4216 We search for consecutive rows with the same nonzero structure, thereby 4217 reusing matrix information to achieve increased efficiency. 4218 4219 Options Database Keys: 4220 + -mat_no_inode - Do not use inodes 4221 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4222 4223 4224 4225 Example usage: 4226 4227 Consider the following 8x8 matrix with 34 non-zero values, that is 4228 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4229 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4230 as follows 4231 4232 .vb 4233 1 2 0 | 0 3 0 | 0 4 4234 Proc0 0 5 6 | 7 0 0 | 8 0 4235 9 0 10 | 11 0 0 | 12 0 4236 ------------------------------------- 4237 13 0 14 | 15 16 17 | 0 0 4238 Proc1 0 18 0 | 19 20 21 | 0 0 4239 0 0 0 | 22 23 0 | 24 0 4240 ------------------------------------- 4241 Proc2 25 26 27 | 0 0 28 | 29 0 4242 30 0 0 | 31 32 33 | 0 34 4243 .ve 4244 4245 This can be represented as a collection of submatrices as 4246 4247 .vb 4248 A B C 4249 D E F 4250 G H I 4251 .ve 4252 4253 Where the submatrices A,B,C are owned by proc0, D,E,F are 4254 owned by proc1, G,H,I are owned by proc2. 4255 4256 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4257 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4258 The 'M','N' parameters are 8,8, and have the same values on all procs. 4259 4260 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4261 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4262 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4263 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4264 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4265 matrix, ans [DF] as another SeqAIJ matrix. 4266 4267 When d_nz, o_nz parameters are specified, d_nz storage elements are 4268 allocated for every row of the local diagonal submatrix, and o_nz 4269 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4270 One way to choose d_nz and o_nz is to use the max nonzerors per local 4271 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4272 In this case, the values of d_nz,o_nz are 4273 .vb 4274 proc0 : dnz = 2, o_nz = 2 4275 proc1 : dnz = 3, o_nz = 2 4276 proc2 : dnz = 1, o_nz = 4 4277 .ve 4278 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4279 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4280 for proc3. i.e we are using 12+15+10=37 storage locations to store 4281 34 values. 4282 4283 When d_nnz, o_nnz parameters are specified, the storage is specified 4284 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4285 In the above case the values for d_nnz,o_nnz are 4286 .vb 4287 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4288 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4289 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4290 .ve 4291 Here the space allocated is sum of all the above values i.e 34, and 4292 hence pre-allocation is perfect. 4293 4294 Level: intermediate 4295 4296 .keywords: matrix, aij, compressed row, sparse, parallel 4297 4298 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4299 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4300 @*/ 4301 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4302 { 4303 PetscErrorCode ierr; 4304 PetscMPIInt size; 4305 4306 PetscFunctionBegin; 4307 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4308 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4309 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4310 if (size > 1) { 4311 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4312 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4313 } else { 4314 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4315 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4316 } 4317 PetscFunctionReturn(0); 4318 } 4319 4320 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4321 { 4322 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4323 PetscBool flg; 4324 PetscErrorCode ierr; 4325 4326 PetscFunctionBegin; 4327 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4328 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4329 if (Ad) *Ad = a->A; 4330 if (Ao) *Ao = a->B; 4331 if (colmap) *colmap = a->garray; 4332 PetscFunctionReturn(0); 4333 } 4334 4335 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4336 { 4337 PetscErrorCode ierr; 4338 PetscInt m,N,i,rstart,nnz,Ii; 4339 PetscInt *indx; 4340 PetscScalar *values; 4341 4342 PetscFunctionBegin; 4343 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4344 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4345 PetscInt *dnz,*onz,sum,bs,cbs; 4346 4347 if (n == PETSC_DECIDE) { 4348 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4349 } 4350 /* Check sum(n) = N */ 4351 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4352 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4353 4354 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4355 rstart -= m; 4356 4357 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4358 for (i=0; i<m; i++) { 4359 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4360 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4361 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4362 } 4363 4364 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4365 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4366 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4367 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4368 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4369 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4370 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4371 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4372 } 4373 4374 /* numeric phase */ 4375 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4376 for (i=0; i<m; i++) { 4377 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4378 Ii = i + rstart; 4379 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4380 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4381 } 4382 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4383 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4384 PetscFunctionReturn(0); 4385 } 4386 4387 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4388 { 4389 PetscErrorCode ierr; 4390 PetscMPIInt rank; 4391 PetscInt m,N,i,rstart,nnz; 4392 size_t len; 4393 const PetscInt *indx; 4394 PetscViewer out; 4395 char *name; 4396 Mat B; 4397 const PetscScalar *values; 4398 4399 PetscFunctionBegin; 4400 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4401 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4402 /* Should this be the type of the diagonal block of A? */ 4403 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4404 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4405 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4406 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4407 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4408 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4409 for (i=0; i<m; i++) { 4410 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4411 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4412 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4413 } 4414 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4415 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4416 4417 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4418 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4419 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4420 sprintf(name,"%s.%d",outfile,rank); 4421 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4422 ierr = PetscFree(name);CHKERRQ(ierr); 4423 ierr = MatView(B,out);CHKERRQ(ierr); 4424 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4425 ierr = MatDestroy(&B);CHKERRQ(ierr); 4426 PetscFunctionReturn(0); 4427 } 4428 4429 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4430 { 4431 PetscErrorCode ierr; 4432 Mat_Merge_SeqsToMPI *merge; 4433 PetscContainer container; 4434 4435 PetscFunctionBegin; 4436 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4437 if (container) { 4438 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4439 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4440 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4441 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4442 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4443 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4444 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4445 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4446 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4447 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4448 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4449 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4450 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4451 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4452 ierr = PetscFree(merge);CHKERRQ(ierr); 4453 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4454 } 4455 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4456 PetscFunctionReturn(0); 4457 } 4458 4459 #include <../src/mat/utils/freespace.h> 4460 #include <petscbt.h> 4461 4462 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4463 { 4464 PetscErrorCode ierr; 4465 MPI_Comm comm; 4466 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4467 PetscMPIInt size,rank,taga,*len_s; 4468 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4469 PetscInt proc,m; 4470 PetscInt **buf_ri,**buf_rj; 4471 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4472 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4473 MPI_Request *s_waits,*r_waits; 4474 MPI_Status *status; 4475 MatScalar *aa=a->a; 4476 MatScalar **abuf_r,*ba_i; 4477 Mat_Merge_SeqsToMPI *merge; 4478 PetscContainer container; 4479 4480 PetscFunctionBegin; 4481 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4482 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4483 4484 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4485 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4486 4487 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4488 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4489 4490 bi = merge->bi; 4491 bj = merge->bj; 4492 buf_ri = merge->buf_ri; 4493 buf_rj = merge->buf_rj; 4494 4495 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4496 owners = merge->rowmap->range; 4497 len_s = merge->len_s; 4498 4499 /* send and recv matrix values */ 4500 /*-----------------------------*/ 4501 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4502 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4503 4504 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4505 for (proc=0,k=0; proc<size; proc++) { 4506 if (!len_s[proc]) continue; 4507 i = owners[proc]; 4508 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4509 k++; 4510 } 4511 4512 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4513 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4514 ierr = PetscFree(status);CHKERRQ(ierr); 4515 4516 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4517 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4518 4519 /* insert mat values of mpimat */ 4520 /*----------------------------*/ 4521 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4522 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4523 4524 for (k=0; k<merge->nrecv; k++) { 4525 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4526 nrows = *(buf_ri_k[k]); 4527 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4528 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4529 } 4530 4531 /* set values of ba */ 4532 m = merge->rowmap->n; 4533 for (i=0; i<m; i++) { 4534 arow = owners[rank] + i; 4535 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4536 bnzi = bi[i+1] - bi[i]; 4537 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4538 4539 /* add local non-zero vals of this proc's seqmat into ba */ 4540 anzi = ai[arow+1] - ai[arow]; 4541 aj = a->j + ai[arow]; 4542 aa = a->a + ai[arow]; 4543 nextaj = 0; 4544 for (j=0; nextaj<anzi; j++) { 4545 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4546 ba_i[j] += aa[nextaj++]; 4547 } 4548 } 4549 4550 /* add received vals into ba */ 4551 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4552 /* i-th row */ 4553 if (i == *nextrow[k]) { 4554 anzi = *(nextai[k]+1) - *nextai[k]; 4555 aj = buf_rj[k] + *(nextai[k]); 4556 aa = abuf_r[k] + *(nextai[k]); 4557 nextaj = 0; 4558 for (j=0; nextaj<anzi; j++) { 4559 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4560 ba_i[j] += aa[nextaj++]; 4561 } 4562 } 4563 nextrow[k]++; nextai[k]++; 4564 } 4565 } 4566 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4567 } 4568 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4569 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4570 4571 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4572 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4573 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4574 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4575 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4576 PetscFunctionReturn(0); 4577 } 4578 4579 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4580 { 4581 PetscErrorCode ierr; 4582 Mat B_mpi; 4583 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4584 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4585 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4586 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4587 PetscInt len,proc,*dnz,*onz,bs,cbs; 4588 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4589 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4590 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4591 MPI_Status *status; 4592 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4593 PetscBT lnkbt; 4594 Mat_Merge_SeqsToMPI *merge; 4595 PetscContainer container; 4596 4597 PetscFunctionBegin; 4598 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4599 4600 /* make sure it is a PETSc comm */ 4601 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4602 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4603 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4604 4605 ierr = PetscNew(&merge);CHKERRQ(ierr); 4606 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4607 4608 /* determine row ownership */ 4609 /*---------------------------------------------------------*/ 4610 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4611 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4612 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4613 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4614 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4615 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4616 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4617 4618 m = merge->rowmap->n; 4619 owners = merge->rowmap->range; 4620 4621 /* determine the number of messages to send, their lengths */ 4622 /*---------------------------------------------------------*/ 4623 len_s = merge->len_s; 4624 4625 len = 0; /* length of buf_si[] */ 4626 merge->nsend = 0; 4627 for (proc=0; proc<size; proc++) { 4628 len_si[proc] = 0; 4629 if (proc == rank) { 4630 len_s[proc] = 0; 4631 } else { 4632 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4633 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4634 } 4635 if (len_s[proc]) { 4636 merge->nsend++; 4637 nrows = 0; 4638 for (i=owners[proc]; i<owners[proc+1]; i++) { 4639 if (ai[i+1] > ai[i]) nrows++; 4640 } 4641 len_si[proc] = 2*(nrows+1); 4642 len += len_si[proc]; 4643 } 4644 } 4645 4646 /* determine the number and length of messages to receive for ij-structure */ 4647 /*-------------------------------------------------------------------------*/ 4648 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4649 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4650 4651 /* post the Irecv of j-structure */ 4652 /*-------------------------------*/ 4653 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4654 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4655 4656 /* post the Isend of j-structure */ 4657 /*--------------------------------*/ 4658 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4659 4660 for (proc=0, k=0; proc<size; proc++) { 4661 if (!len_s[proc]) continue; 4662 i = owners[proc]; 4663 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4664 k++; 4665 } 4666 4667 /* receives and sends of j-structure are complete */ 4668 /*------------------------------------------------*/ 4669 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4670 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4671 4672 /* send and recv i-structure */ 4673 /*---------------------------*/ 4674 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4675 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4676 4677 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4678 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4679 for (proc=0,k=0; proc<size; proc++) { 4680 if (!len_s[proc]) continue; 4681 /* form outgoing message for i-structure: 4682 buf_si[0]: nrows to be sent 4683 [1:nrows]: row index (global) 4684 [nrows+1:2*nrows+1]: i-structure index 4685 */ 4686 /*-------------------------------------------*/ 4687 nrows = len_si[proc]/2 - 1; 4688 buf_si_i = buf_si + nrows+1; 4689 buf_si[0] = nrows; 4690 buf_si_i[0] = 0; 4691 nrows = 0; 4692 for (i=owners[proc]; i<owners[proc+1]; i++) { 4693 anzi = ai[i+1] - ai[i]; 4694 if (anzi) { 4695 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4696 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4697 nrows++; 4698 } 4699 } 4700 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4701 k++; 4702 buf_si += len_si[proc]; 4703 } 4704 4705 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4706 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4707 4708 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4709 for (i=0; i<merge->nrecv; i++) { 4710 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4711 } 4712 4713 ierr = PetscFree(len_si);CHKERRQ(ierr); 4714 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4715 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4716 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4717 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4718 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4719 ierr = PetscFree(status);CHKERRQ(ierr); 4720 4721 /* compute a local seq matrix in each processor */ 4722 /*----------------------------------------------*/ 4723 /* allocate bi array and free space for accumulating nonzero column info */ 4724 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4725 bi[0] = 0; 4726 4727 /* create and initialize a linked list */ 4728 nlnk = N+1; 4729 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4730 4731 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4732 len = ai[owners[rank+1]] - ai[owners[rank]]; 4733 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4734 4735 current_space = free_space; 4736 4737 /* determine symbolic info for each local row */ 4738 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4739 4740 for (k=0; k<merge->nrecv; k++) { 4741 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4742 nrows = *buf_ri_k[k]; 4743 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4744 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4745 } 4746 4747 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4748 len = 0; 4749 for (i=0; i<m; i++) { 4750 bnzi = 0; 4751 /* add local non-zero cols of this proc's seqmat into lnk */ 4752 arow = owners[rank] + i; 4753 anzi = ai[arow+1] - ai[arow]; 4754 aj = a->j + ai[arow]; 4755 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4756 bnzi += nlnk; 4757 /* add received col data into lnk */ 4758 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4759 if (i == *nextrow[k]) { /* i-th row */ 4760 anzi = *(nextai[k]+1) - *nextai[k]; 4761 aj = buf_rj[k] + *nextai[k]; 4762 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4763 bnzi += nlnk; 4764 nextrow[k]++; nextai[k]++; 4765 } 4766 } 4767 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4768 4769 /* if free space is not available, make more free space */ 4770 if (current_space->local_remaining<bnzi) { 4771 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4772 nspacedouble++; 4773 } 4774 /* copy data into free space, then initialize lnk */ 4775 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4776 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4777 4778 current_space->array += bnzi; 4779 current_space->local_used += bnzi; 4780 current_space->local_remaining -= bnzi; 4781 4782 bi[i+1] = bi[i] + bnzi; 4783 } 4784 4785 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4786 4787 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4788 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4789 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4790 4791 /* create symbolic parallel matrix B_mpi */ 4792 /*---------------------------------------*/ 4793 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4794 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4795 if (n==PETSC_DECIDE) { 4796 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4797 } else { 4798 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4799 } 4800 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4801 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4802 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4803 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4804 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4805 4806 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4807 B_mpi->assembled = PETSC_FALSE; 4808 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4809 merge->bi = bi; 4810 merge->bj = bj; 4811 merge->buf_ri = buf_ri; 4812 merge->buf_rj = buf_rj; 4813 merge->coi = NULL; 4814 merge->coj = NULL; 4815 merge->owners_co = NULL; 4816 4817 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4818 4819 /* attach the supporting struct to B_mpi for reuse */ 4820 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4821 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4822 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4823 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4824 *mpimat = B_mpi; 4825 4826 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4827 PetscFunctionReturn(0); 4828 } 4829 4830 /*@C 4831 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4832 matrices from each processor 4833 4834 Collective on MPI_Comm 4835 4836 Input Parameters: 4837 + comm - the communicators the parallel matrix will live on 4838 . seqmat - the input sequential matrices 4839 . m - number of local rows (or PETSC_DECIDE) 4840 . n - number of local columns (or PETSC_DECIDE) 4841 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4842 4843 Output Parameter: 4844 . mpimat - the parallel matrix generated 4845 4846 Level: advanced 4847 4848 Notes: 4849 The dimensions of the sequential matrix in each processor MUST be the same. 4850 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4851 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4852 @*/ 4853 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4854 { 4855 PetscErrorCode ierr; 4856 PetscMPIInt size; 4857 4858 PetscFunctionBegin; 4859 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4860 if (size == 1) { 4861 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4862 if (scall == MAT_INITIAL_MATRIX) { 4863 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4864 } else { 4865 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4866 } 4867 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4868 PetscFunctionReturn(0); 4869 } 4870 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4871 if (scall == MAT_INITIAL_MATRIX) { 4872 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4873 } 4874 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4875 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4876 PetscFunctionReturn(0); 4877 } 4878 4879 /*@ 4880 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4881 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4882 with MatGetSize() 4883 4884 Not Collective 4885 4886 Input Parameters: 4887 + A - the matrix 4888 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4889 4890 Output Parameter: 4891 . A_loc - the local sequential matrix generated 4892 4893 Level: developer 4894 4895 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4896 4897 @*/ 4898 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4899 { 4900 PetscErrorCode ierr; 4901 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4902 Mat_SeqAIJ *mat,*a,*b; 4903 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4904 MatScalar *aa,*ba,*cam; 4905 PetscScalar *ca; 4906 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4907 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4908 PetscBool match; 4909 MPI_Comm comm; 4910 PetscMPIInt size; 4911 4912 PetscFunctionBegin; 4913 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4914 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4915 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4916 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4917 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4918 4919 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4920 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4921 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4922 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4923 aa = a->a; ba = b->a; 4924 if (scall == MAT_INITIAL_MATRIX) { 4925 if (size == 1) { 4926 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4927 PetscFunctionReturn(0); 4928 } 4929 4930 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4931 ci[0] = 0; 4932 for (i=0; i<am; i++) { 4933 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4934 } 4935 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4936 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4937 k = 0; 4938 for (i=0; i<am; i++) { 4939 ncols_o = bi[i+1] - bi[i]; 4940 ncols_d = ai[i+1] - ai[i]; 4941 /* off-diagonal portion of A */ 4942 for (jo=0; jo<ncols_o; jo++) { 4943 col = cmap[*bj]; 4944 if (col >= cstart) break; 4945 cj[k] = col; bj++; 4946 ca[k++] = *ba++; 4947 } 4948 /* diagonal portion of A */ 4949 for (j=0; j<ncols_d; j++) { 4950 cj[k] = cstart + *aj++; 4951 ca[k++] = *aa++; 4952 } 4953 /* off-diagonal portion of A */ 4954 for (j=jo; j<ncols_o; j++) { 4955 cj[k] = cmap[*bj++]; 4956 ca[k++] = *ba++; 4957 } 4958 } 4959 /* put together the new matrix */ 4960 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4961 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4962 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4963 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4964 mat->free_a = PETSC_TRUE; 4965 mat->free_ij = PETSC_TRUE; 4966 mat->nonew = 0; 4967 } else if (scall == MAT_REUSE_MATRIX) { 4968 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4969 ci = mat->i; cj = mat->j; cam = mat->a; 4970 for (i=0; i<am; i++) { 4971 /* off-diagonal portion of A */ 4972 ncols_o = bi[i+1] - bi[i]; 4973 for (jo=0; jo<ncols_o; jo++) { 4974 col = cmap[*bj]; 4975 if (col >= cstart) break; 4976 *cam++ = *ba++; bj++; 4977 } 4978 /* diagonal portion of A */ 4979 ncols_d = ai[i+1] - ai[i]; 4980 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4981 /* off-diagonal portion of A */ 4982 for (j=jo; j<ncols_o; j++) { 4983 *cam++ = *ba++; bj++; 4984 } 4985 } 4986 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4987 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4988 PetscFunctionReturn(0); 4989 } 4990 4991 /*@C 4992 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4993 4994 Not Collective 4995 4996 Input Parameters: 4997 + A - the matrix 4998 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4999 - row, col - index sets of rows and columns to extract (or NULL) 5000 5001 Output Parameter: 5002 . A_loc - the local sequential matrix generated 5003 5004 Level: developer 5005 5006 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5007 5008 @*/ 5009 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5010 { 5011 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5012 PetscErrorCode ierr; 5013 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5014 IS isrowa,iscola; 5015 Mat *aloc; 5016 PetscBool match; 5017 5018 PetscFunctionBegin; 5019 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5020 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5021 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5022 if (!row) { 5023 start = A->rmap->rstart; end = A->rmap->rend; 5024 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5025 } else { 5026 isrowa = *row; 5027 } 5028 if (!col) { 5029 start = A->cmap->rstart; 5030 cmap = a->garray; 5031 nzA = a->A->cmap->n; 5032 nzB = a->B->cmap->n; 5033 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5034 ncols = 0; 5035 for (i=0; i<nzB; i++) { 5036 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5037 else break; 5038 } 5039 imark = i; 5040 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5041 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5042 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5043 } else { 5044 iscola = *col; 5045 } 5046 if (scall != MAT_INITIAL_MATRIX) { 5047 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5048 aloc[0] = *A_loc; 5049 } 5050 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5051 *A_loc = aloc[0]; 5052 ierr = PetscFree(aloc);CHKERRQ(ierr); 5053 if (!row) { 5054 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5055 } 5056 if (!col) { 5057 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5058 } 5059 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5060 PetscFunctionReturn(0); 5061 } 5062 5063 /*@C 5064 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5065 5066 Collective on Mat 5067 5068 Input Parameters: 5069 + A,B - the matrices in mpiaij format 5070 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5071 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5072 5073 Output Parameter: 5074 + rowb, colb - index sets of rows and columns of B to extract 5075 - B_seq - the sequential matrix generated 5076 5077 Level: developer 5078 5079 @*/ 5080 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5081 { 5082 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5083 PetscErrorCode ierr; 5084 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5085 IS isrowb,iscolb; 5086 Mat *bseq=NULL; 5087 5088 PetscFunctionBegin; 5089 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5090 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5091 } 5092 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5093 5094 if (scall == MAT_INITIAL_MATRIX) { 5095 start = A->cmap->rstart; 5096 cmap = a->garray; 5097 nzA = a->A->cmap->n; 5098 nzB = a->B->cmap->n; 5099 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5100 ncols = 0; 5101 for (i=0; i<nzB; i++) { /* row < local row index */ 5102 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5103 else break; 5104 } 5105 imark = i; 5106 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5107 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5108 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5109 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5110 } else { 5111 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5112 isrowb = *rowb; iscolb = *colb; 5113 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5114 bseq[0] = *B_seq; 5115 } 5116 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5117 *B_seq = bseq[0]; 5118 ierr = PetscFree(bseq);CHKERRQ(ierr); 5119 if (!rowb) { 5120 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5121 } else { 5122 *rowb = isrowb; 5123 } 5124 if (!colb) { 5125 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5126 } else { 5127 *colb = iscolb; 5128 } 5129 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5130 PetscFunctionReturn(0); 5131 } 5132 5133 /* 5134 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5135 of the OFF-DIAGONAL portion of local A 5136 5137 Collective on Mat 5138 5139 Input Parameters: 5140 + A,B - the matrices in mpiaij format 5141 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5142 5143 Output Parameter: 5144 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5145 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5146 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5147 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5148 5149 Level: developer 5150 5151 */ 5152 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5153 { 5154 VecScatter_MPI_General *gen_to,*gen_from; 5155 PetscErrorCode ierr; 5156 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5157 Mat_SeqAIJ *b_oth; 5158 VecScatter ctx; 5159 MPI_Comm comm; 5160 PetscMPIInt *rprocs,*sprocs,tag,rank; 5161 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5162 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5163 PetscScalar *b_otha,*bufa,*bufA,*vals; 5164 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5165 MPI_Request *rwaits = NULL,*swaits = NULL; 5166 MPI_Status *sstatus,rstatus; 5167 PetscMPIInt jj,size; 5168 VecScatterType type; 5169 PetscBool mpi1; 5170 5171 PetscFunctionBegin; 5172 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5173 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5174 5175 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5176 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5177 } 5178 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5179 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5180 5181 if (size == 1) { 5182 startsj_s = NULL; 5183 bufa_ptr = NULL; 5184 *B_oth = NULL; 5185 PetscFunctionReturn(0); 5186 } 5187 5188 ctx = a->Mvctx; 5189 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5190 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5191 if (!mpi1) { 5192 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5193 thus create a->Mvctx_mpi1 */ 5194 if (!a->Mvctx_mpi1) { 5195 a->Mvctx_mpi1_flg = PETSC_TRUE; 5196 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5197 } 5198 ctx = a->Mvctx_mpi1; 5199 } 5200 tag = ((PetscObject)ctx)->tag; 5201 5202 gen_to = (VecScatter_MPI_General*)ctx->todata; 5203 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5204 nrecvs = gen_from->n; 5205 nsends = gen_to->n; 5206 5207 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5208 srow = gen_to->indices; /* local row index to be sent */ 5209 sstarts = gen_to->starts; 5210 sprocs = gen_to->procs; 5211 sstatus = gen_to->sstatus; 5212 sbs = gen_to->bs; 5213 rstarts = gen_from->starts; 5214 rprocs = gen_from->procs; 5215 rbs = gen_from->bs; 5216 5217 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5218 if (scall == MAT_INITIAL_MATRIX) { 5219 /* i-array */ 5220 /*---------*/ 5221 /* post receives */ 5222 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5223 for (i=0; i<nrecvs; i++) { 5224 rowlen = rvalues + rstarts[i]*rbs; 5225 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5226 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5227 } 5228 5229 /* pack the outgoing message */ 5230 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5231 5232 sstartsj[0] = 0; 5233 rstartsj[0] = 0; 5234 len = 0; /* total length of j or a array to be sent */ 5235 k = 0; 5236 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5237 for (i=0; i<nsends; i++) { 5238 rowlen = svalues + sstarts[i]*sbs; 5239 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5240 for (j=0; j<nrows; j++) { 5241 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5242 for (l=0; l<sbs; l++) { 5243 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5244 5245 rowlen[j*sbs+l] = ncols; 5246 5247 len += ncols; 5248 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5249 } 5250 k++; 5251 } 5252 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5253 5254 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5255 } 5256 /* recvs and sends of i-array are completed */ 5257 i = nrecvs; 5258 while (i--) { 5259 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5260 } 5261 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5262 ierr = PetscFree(svalues);CHKERRQ(ierr); 5263 5264 /* allocate buffers for sending j and a arrays */ 5265 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5266 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5267 5268 /* create i-array of B_oth */ 5269 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5270 5271 b_othi[0] = 0; 5272 len = 0; /* total length of j or a array to be received */ 5273 k = 0; 5274 for (i=0; i<nrecvs; i++) { 5275 rowlen = rvalues + rstarts[i]*rbs; 5276 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5277 for (j=0; j<nrows; j++) { 5278 b_othi[k+1] = b_othi[k] + rowlen[j]; 5279 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5280 k++; 5281 } 5282 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5283 } 5284 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5285 5286 /* allocate space for j and a arrrays of B_oth */ 5287 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5288 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5289 5290 /* j-array */ 5291 /*---------*/ 5292 /* post receives of j-array */ 5293 for (i=0; i<nrecvs; i++) { 5294 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5295 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5296 } 5297 5298 /* pack the outgoing message j-array */ 5299 k = 0; 5300 for (i=0; i<nsends; i++) { 5301 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5302 bufJ = bufj+sstartsj[i]; 5303 for (j=0; j<nrows; j++) { 5304 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5305 for (ll=0; ll<sbs; ll++) { 5306 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5307 for (l=0; l<ncols; l++) { 5308 *bufJ++ = cols[l]; 5309 } 5310 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5311 } 5312 } 5313 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5314 } 5315 5316 /* recvs and sends of j-array are completed */ 5317 i = nrecvs; 5318 while (i--) { 5319 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5320 } 5321 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5322 } else if (scall == MAT_REUSE_MATRIX) { 5323 sstartsj = *startsj_s; 5324 rstartsj = *startsj_r; 5325 bufa = *bufa_ptr; 5326 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5327 b_otha = b_oth->a; 5328 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5329 5330 /* a-array */ 5331 /*---------*/ 5332 /* post receives of a-array */ 5333 for (i=0; i<nrecvs; i++) { 5334 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5335 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5336 } 5337 5338 /* pack the outgoing message a-array */ 5339 k = 0; 5340 for (i=0; i<nsends; i++) { 5341 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5342 bufA = bufa+sstartsj[i]; 5343 for (j=0; j<nrows; j++) { 5344 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5345 for (ll=0; ll<sbs; ll++) { 5346 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5347 for (l=0; l<ncols; l++) { 5348 *bufA++ = vals[l]; 5349 } 5350 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5351 } 5352 } 5353 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5354 } 5355 /* recvs and sends of a-array are completed */ 5356 i = nrecvs; 5357 while (i--) { 5358 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5359 } 5360 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5361 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5362 5363 if (scall == MAT_INITIAL_MATRIX) { 5364 /* put together the new matrix */ 5365 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5366 5367 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5368 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5369 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5370 b_oth->free_a = PETSC_TRUE; 5371 b_oth->free_ij = PETSC_TRUE; 5372 b_oth->nonew = 0; 5373 5374 ierr = PetscFree(bufj);CHKERRQ(ierr); 5375 if (!startsj_s || !bufa_ptr) { 5376 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5377 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5378 } else { 5379 *startsj_s = sstartsj; 5380 *startsj_r = rstartsj; 5381 *bufa_ptr = bufa; 5382 } 5383 } 5384 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5385 PetscFunctionReturn(0); 5386 } 5387 5388 /*@C 5389 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5390 5391 Not Collective 5392 5393 Input Parameters: 5394 . A - The matrix in mpiaij format 5395 5396 Output Parameter: 5397 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5398 . colmap - A map from global column index to local index into lvec 5399 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5400 5401 Level: developer 5402 5403 @*/ 5404 #if defined(PETSC_USE_CTABLE) 5405 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5406 #else 5407 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5408 #endif 5409 { 5410 Mat_MPIAIJ *a; 5411 5412 PetscFunctionBegin; 5413 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5414 PetscValidPointer(lvec, 2); 5415 PetscValidPointer(colmap, 3); 5416 PetscValidPointer(multScatter, 4); 5417 a = (Mat_MPIAIJ*) A->data; 5418 if (lvec) *lvec = a->lvec; 5419 if (colmap) *colmap = a->colmap; 5420 if (multScatter) *multScatter = a->Mvctx; 5421 PetscFunctionReturn(0); 5422 } 5423 5424 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5425 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5426 #if defined(PETSC_HAVE_MKL_SPARSE) 5427 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5428 #endif 5429 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5430 #if defined(PETSC_HAVE_ELEMENTAL) 5431 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5432 #endif 5433 #if defined(PETSC_HAVE_HYPRE) 5434 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5435 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5436 #endif 5437 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 5438 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5439 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5440 5441 /* 5442 Computes (B'*A')' since computing B*A directly is untenable 5443 5444 n p p 5445 ( ) ( ) ( ) 5446 m ( A ) * n ( B ) = m ( C ) 5447 ( ) ( ) ( ) 5448 5449 */ 5450 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5451 { 5452 PetscErrorCode ierr; 5453 Mat At,Bt,Ct; 5454 5455 PetscFunctionBegin; 5456 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5457 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5458 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5459 ierr = MatDestroy(&At);CHKERRQ(ierr); 5460 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5461 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5462 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5463 PetscFunctionReturn(0); 5464 } 5465 5466 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5467 { 5468 PetscErrorCode ierr; 5469 PetscInt m=A->rmap->n,n=B->cmap->n; 5470 Mat Cmat; 5471 5472 PetscFunctionBegin; 5473 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5474 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5475 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5476 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5477 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5478 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5479 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5480 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5481 5482 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5483 5484 *C = Cmat; 5485 PetscFunctionReturn(0); 5486 } 5487 5488 /* ----------------------------------------------------------------*/ 5489 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5490 { 5491 PetscErrorCode ierr; 5492 5493 PetscFunctionBegin; 5494 if (scall == MAT_INITIAL_MATRIX) { 5495 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5496 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5497 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5498 } 5499 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5500 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5501 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5502 PetscFunctionReturn(0); 5503 } 5504 5505 /*MC 5506 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5507 5508 Options Database Keys: 5509 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5510 5511 Level: beginner 5512 5513 .seealso: MatCreateAIJ() 5514 M*/ 5515 5516 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5517 { 5518 Mat_MPIAIJ *b; 5519 PetscErrorCode ierr; 5520 PetscMPIInt size; 5521 5522 PetscFunctionBegin; 5523 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5524 5525 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5526 B->data = (void*)b; 5527 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5528 B->assembled = PETSC_FALSE; 5529 B->insertmode = NOT_SET_VALUES; 5530 b->size = size; 5531 5532 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5533 5534 /* build cache for off array entries formed */ 5535 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5536 5537 b->donotstash = PETSC_FALSE; 5538 b->colmap = 0; 5539 b->garray = 0; 5540 b->roworiented = PETSC_TRUE; 5541 5542 /* stuff used for matrix vector multiply */ 5543 b->lvec = NULL; 5544 b->Mvctx = NULL; 5545 5546 /* stuff for MatGetRow() */ 5547 b->rowindices = 0; 5548 b->rowvalues = 0; 5549 b->getrowactive = PETSC_FALSE; 5550 5551 /* flexible pointer used in CUSP/CUSPARSE classes */ 5552 b->spptr = NULL; 5553 5554 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5555 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5556 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5557 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5558 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5559 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5560 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5561 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5562 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5563 #if defined(PETSC_HAVE_MKL_SPARSE) 5564 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5565 #endif 5566 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5567 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5568 #if defined(PETSC_HAVE_ELEMENTAL) 5569 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5570 #endif 5571 #if defined(PETSC_HAVE_HYPRE) 5572 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5573 #endif 5574 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5575 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5576 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5577 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5578 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5579 #if defined(PETSC_HAVE_HYPRE) 5580 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5581 #endif 5582 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5583 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5584 PetscFunctionReturn(0); 5585 } 5586 5587 /*@C 5588 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5589 and "off-diagonal" part of the matrix in CSR format. 5590 5591 Collective on MPI_Comm 5592 5593 Input Parameters: 5594 + comm - MPI communicator 5595 . m - number of local rows (Cannot be PETSC_DECIDE) 5596 . n - This value should be the same as the local size used in creating the 5597 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5598 calculated if N is given) For square matrices n is almost always m. 5599 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5600 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5601 . i - row indices for "diagonal" portion of matrix 5602 . j - column indices 5603 . a - matrix values 5604 . oi - row indices for "off-diagonal" portion of matrix 5605 . oj - column indices 5606 - oa - matrix values 5607 5608 Output Parameter: 5609 . mat - the matrix 5610 5611 Level: advanced 5612 5613 Notes: 5614 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5615 must free the arrays once the matrix has been destroyed and not before. 5616 5617 The i and j indices are 0 based 5618 5619 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5620 5621 This sets local rows and cannot be used to set off-processor values. 5622 5623 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5624 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5625 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5626 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5627 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5628 communication if it is known that only local entries will be set. 5629 5630 .keywords: matrix, aij, compressed row, sparse, parallel 5631 5632 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5633 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5634 @*/ 5635 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5636 { 5637 PetscErrorCode ierr; 5638 Mat_MPIAIJ *maij; 5639 5640 PetscFunctionBegin; 5641 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5642 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5643 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5644 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5645 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5646 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5647 maij = (Mat_MPIAIJ*) (*mat)->data; 5648 5649 (*mat)->preallocated = PETSC_TRUE; 5650 5651 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5652 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5653 5654 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5655 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5656 5657 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5658 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5659 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5660 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5661 5662 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5663 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5664 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5665 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5666 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5667 PetscFunctionReturn(0); 5668 } 5669 5670 /* 5671 Special version for direct calls from Fortran 5672 */ 5673 #include <petsc/private/fortranimpl.h> 5674 5675 /* Change these macros so can be used in void function */ 5676 #undef CHKERRQ 5677 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5678 #undef SETERRQ2 5679 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5680 #undef SETERRQ3 5681 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5682 #undef SETERRQ 5683 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5684 5685 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5686 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5687 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5688 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5689 #else 5690 #endif 5691 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5692 { 5693 Mat mat = *mmat; 5694 PetscInt m = *mm, n = *mn; 5695 InsertMode addv = *maddv; 5696 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5697 PetscScalar value; 5698 PetscErrorCode ierr; 5699 5700 MatCheckPreallocated(mat,1); 5701 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5702 5703 #if defined(PETSC_USE_DEBUG) 5704 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5705 #endif 5706 { 5707 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5708 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5709 PetscBool roworiented = aij->roworiented; 5710 5711 /* Some Variables required in the macro */ 5712 Mat A = aij->A; 5713 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5714 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5715 MatScalar *aa = a->a; 5716 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5717 Mat B = aij->B; 5718 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5719 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5720 MatScalar *ba = b->a; 5721 5722 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5723 PetscInt nonew = a->nonew; 5724 MatScalar *ap1,*ap2; 5725 5726 PetscFunctionBegin; 5727 for (i=0; i<m; i++) { 5728 if (im[i] < 0) continue; 5729 #if defined(PETSC_USE_DEBUG) 5730 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5731 #endif 5732 if (im[i] >= rstart && im[i] < rend) { 5733 row = im[i] - rstart; 5734 lastcol1 = -1; 5735 rp1 = aj + ai[row]; 5736 ap1 = aa + ai[row]; 5737 rmax1 = aimax[row]; 5738 nrow1 = ailen[row]; 5739 low1 = 0; 5740 high1 = nrow1; 5741 lastcol2 = -1; 5742 rp2 = bj + bi[row]; 5743 ap2 = ba + bi[row]; 5744 rmax2 = bimax[row]; 5745 nrow2 = bilen[row]; 5746 low2 = 0; 5747 high2 = nrow2; 5748 5749 for (j=0; j<n; j++) { 5750 if (roworiented) value = v[i*n+j]; 5751 else value = v[i+j*m]; 5752 if (in[j] >= cstart && in[j] < cend) { 5753 col = in[j] - cstart; 5754 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5755 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5756 } else if (in[j] < 0) continue; 5757 #if defined(PETSC_USE_DEBUG) 5758 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5759 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5760 #endif 5761 else { 5762 if (mat->was_assembled) { 5763 if (!aij->colmap) { 5764 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5765 } 5766 #if defined(PETSC_USE_CTABLE) 5767 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5768 col--; 5769 #else 5770 col = aij->colmap[in[j]] - 1; 5771 #endif 5772 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5773 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5774 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5775 col = in[j]; 5776 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5777 B = aij->B; 5778 b = (Mat_SeqAIJ*)B->data; 5779 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5780 rp2 = bj + bi[row]; 5781 ap2 = ba + bi[row]; 5782 rmax2 = bimax[row]; 5783 nrow2 = bilen[row]; 5784 low2 = 0; 5785 high2 = nrow2; 5786 bm = aij->B->rmap->n; 5787 ba = b->a; 5788 } 5789 } else col = in[j]; 5790 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5791 } 5792 } 5793 } else if (!aij->donotstash) { 5794 if (roworiented) { 5795 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5796 } else { 5797 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5798 } 5799 } 5800 } 5801 } 5802 PetscFunctionReturnVoid(); 5803 } 5804 5805