1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 636 { 637 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 638 PetscErrorCode ierr; 639 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 640 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 641 642 PetscFunctionBegin; 643 for (i=0; i<m; i++) { 644 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 645 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 646 if (idxm[i] >= rstart && idxm[i] < rend) { 647 row = idxm[i] - rstart; 648 for (j=0; j<n; j++) { 649 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 650 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 651 if (idxn[j] >= cstart && idxn[j] < cend) { 652 col = idxn[j] - cstart; 653 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 654 } else { 655 if (!aij->colmap) { 656 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 657 } 658 #if defined(PETSC_USE_CTABLE) 659 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 660 col--; 661 #else 662 col = aij->colmap[idxn[j]] - 1; 663 #endif 664 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 665 else { 666 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 667 } 668 } 669 } 670 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 671 } 672 PetscFunctionReturn(0); 673 } 674 675 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 676 677 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 678 { 679 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 680 PetscErrorCode ierr; 681 PetscInt nstash,reallocs; 682 683 PetscFunctionBegin; 684 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 685 686 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 687 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 688 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 692 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 693 { 694 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 695 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 696 PetscErrorCode ierr; 697 PetscMPIInt n; 698 PetscInt i,j,rstart,ncols,flg; 699 PetscInt *row,*col; 700 PetscBool other_disassembled; 701 PetscScalar *val; 702 703 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 704 705 PetscFunctionBegin; 706 if (!aij->donotstash && !mat->nooffprocentries) { 707 while (1) { 708 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 709 if (!flg) break; 710 711 for (i=0; i<n; ) { 712 /* Now identify the consecutive vals belonging to the same row */ 713 for (j=i,rstart=row[j]; j<n; j++) { 714 if (row[j] != rstart) break; 715 } 716 if (j < n) ncols = j-i; 717 else ncols = n-i; 718 /* Now assemble all these values with a single function call */ 719 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 720 721 i = j; 722 } 723 } 724 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 725 } 726 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 727 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 728 729 /* determine if any processor has disassembled, if so we must 730 also disassemble ourselfs, in order that we may reassemble. */ 731 /* 732 if nonzero structure of submatrix B cannot change then we know that 733 no processor disassembled thus we can skip this stuff 734 */ 735 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 736 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 737 if (mat->was_assembled && !other_disassembled) { 738 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 739 } 740 } 741 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 742 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 743 } 744 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 745 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 746 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 747 748 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 749 750 aij->rowvalues = 0; 751 752 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 753 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 754 755 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 756 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 757 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 758 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 764 { 765 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 766 PetscErrorCode ierr; 767 768 PetscFunctionBegin; 769 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 770 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 771 PetscFunctionReturn(0); 772 } 773 774 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 775 { 776 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 777 PetscInt *lrows; 778 PetscInt r, len; 779 PetscBool cong; 780 PetscErrorCode ierr; 781 782 PetscFunctionBegin; 783 /* get locally owned rows */ 784 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 785 /* fix right hand side if needed */ 786 if (x && b) { 787 const PetscScalar *xx; 788 PetscScalar *bb; 789 790 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 791 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 792 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 793 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 794 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 795 } 796 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 797 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 798 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 799 if ((diag != 0.0) && cong) { 800 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 801 } else if (diag != 0.0) { 802 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 803 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 804 for (r = 0; r < len; ++r) { 805 const PetscInt row = lrows[r] + A->rmap->rstart; 806 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 807 } 808 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 809 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 810 } else { 811 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 812 } 813 ierr = PetscFree(lrows);CHKERRQ(ierr); 814 815 /* only change matrix nonzero state if pattern was allowed to be changed */ 816 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 817 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 818 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 819 } 820 PetscFunctionReturn(0); 821 } 822 823 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 824 { 825 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 826 PetscErrorCode ierr; 827 PetscMPIInt n = A->rmap->n; 828 PetscInt i,j,r,m,p = 0,len = 0; 829 PetscInt *lrows,*owners = A->rmap->range; 830 PetscSFNode *rrows; 831 PetscSF sf; 832 const PetscScalar *xx; 833 PetscScalar *bb,*mask; 834 Vec xmask,lmask; 835 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 836 const PetscInt *aj, *ii,*ridx; 837 PetscScalar *aa; 838 839 PetscFunctionBegin; 840 /* Create SF where leaves are input rows and roots are owned rows */ 841 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 842 for (r = 0; r < n; ++r) lrows[r] = -1; 843 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 844 for (r = 0; r < N; ++r) { 845 const PetscInt idx = rows[r]; 846 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 847 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 848 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 849 } 850 rrows[r].rank = p; 851 rrows[r].index = rows[r] - owners[p]; 852 } 853 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 854 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 855 /* Collect flags for rows to be zeroed */ 856 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 857 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 858 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 859 /* Compress and put in row numbers */ 860 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 861 /* zero diagonal part of matrix */ 862 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 863 /* handle off diagonal part of matrix */ 864 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 865 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 866 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 867 for (i=0; i<len; i++) bb[lrows[i]] = 1; 868 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 869 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 870 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 872 if (x) { 873 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 876 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 877 } 878 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 879 /* remove zeroed rows of off diagonal matrix */ 880 ii = aij->i; 881 for (i=0; i<len; i++) { 882 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 883 } 884 /* loop over all elements of off process part of matrix zeroing removed columns*/ 885 if (aij->compressedrow.use) { 886 m = aij->compressedrow.nrows; 887 ii = aij->compressedrow.i; 888 ridx = aij->compressedrow.rindex; 889 for (i=0; i<m; i++) { 890 n = ii[i+1] - ii[i]; 891 aj = aij->j + ii[i]; 892 aa = aij->a + ii[i]; 893 894 for (j=0; j<n; j++) { 895 if (PetscAbsScalar(mask[*aj])) { 896 if (b) bb[*ridx] -= *aa*xx[*aj]; 897 *aa = 0.0; 898 } 899 aa++; 900 aj++; 901 } 902 ridx++; 903 } 904 } else { /* do not use compressed row format */ 905 m = l->B->rmap->n; 906 for (i=0; i<m; i++) { 907 n = ii[i+1] - ii[i]; 908 aj = aij->j + ii[i]; 909 aa = aij->a + ii[i]; 910 for (j=0; j<n; j++) { 911 if (PetscAbsScalar(mask[*aj])) { 912 if (b) bb[i] -= *aa*xx[*aj]; 913 *aa = 0.0; 914 } 915 aa++; 916 aj++; 917 } 918 } 919 } 920 if (x) { 921 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 922 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 923 } 924 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 925 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 926 ierr = PetscFree(lrows);CHKERRQ(ierr); 927 928 /* only change matrix nonzero state if pattern was allowed to be changed */ 929 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 930 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 931 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 932 } 933 PetscFunctionReturn(0); 934 } 935 936 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 937 { 938 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 939 PetscErrorCode ierr; 940 PetscInt nt; 941 VecScatter Mvctx = a->Mvctx; 942 943 PetscFunctionBegin; 944 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 945 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 946 947 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 949 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 950 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 951 PetscFunctionReturn(0); 952 } 953 954 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 955 { 956 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 965 { 966 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 967 PetscErrorCode ierr; 968 VecScatter Mvctx = a->Mvctx; 969 970 PetscFunctionBegin; 971 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 972 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 973 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 974 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 979 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 980 { 981 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 982 PetscErrorCode ierr; 983 PetscBool merged; 984 985 PetscFunctionBegin; 986 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 987 /* do nondiagonal part */ 988 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 989 if (!merged) { 990 /* send it on its way */ 991 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 992 /* do local part */ 993 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 994 /* receive remote parts: note this assumes the values are not actually */ 995 /* added in yy until the next line, */ 996 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 997 } else { 998 /* do local part */ 999 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1000 /* send it on its way */ 1001 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1002 /* values actually were received in the Begin() but we need to call this nop */ 1003 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1004 } 1005 PetscFunctionReturn(0); 1006 } 1007 1008 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1009 { 1010 MPI_Comm comm; 1011 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1012 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1013 IS Me,Notme; 1014 PetscErrorCode ierr; 1015 PetscInt M,N,first,last,*notme,i; 1016 PetscMPIInt size; 1017 1018 PetscFunctionBegin; 1019 /* Easy test: symmetric diagonal block */ 1020 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1021 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1022 if (!*f) PetscFunctionReturn(0); 1023 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1024 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1025 if (size == 1) PetscFunctionReturn(0); 1026 1027 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1028 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1029 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1030 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1031 for (i=0; i<first; i++) notme[i] = i; 1032 for (i=last; i<M; i++) notme[i-last+first] = i; 1033 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1034 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1035 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1036 Aoff = Aoffs[0]; 1037 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1038 Boff = Boffs[0]; 1039 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1040 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1041 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1042 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1043 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1044 ierr = PetscFree(notme);CHKERRQ(ierr); 1045 PetscFunctionReturn(0); 1046 } 1047 1048 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1049 { 1050 PetscErrorCode ierr; 1051 1052 PetscFunctionBegin; 1053 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1058 { 1059 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1060 PetscErrorCode ierr; 1061 1062 PetscFunctionBegin; 1063 /* do nondiagonal part */ 1064 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1065 /* send it on its way */ 1066 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1067 /* do local part */ 1068 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1069 /* receive remote parts */ 1070 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 /* 1075 This only works correctly for square matrices where the subblock A->A is the 1076 diagonal block 1077 */ 1078 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1079 { 1080 PetscErrorCode ierr; 1081 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1082 1083 PetscFunctionBegin; 1084 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1085 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1086 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1091 { 1092 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 1095 PetscFunctionBegin; 1096 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1097 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1102 { 1103 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1104 PetscErrorCode ierr; 1105 1106 PetscFunctionBegin; 1107 #if defined(PETSC_USE_LOG) 1108 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1109 #endif 1110 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1111 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1112 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1113 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1114 #if defined(PETSC_USE_CTABLE) 1115 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1116 #else 1117 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1118 #endif 1119 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1120 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1121 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1122 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1123 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1124 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1125 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1126 1127 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1134 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1135 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1136 #if defined(PETSC_HAVE_ELEMENTAL) 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1138 #endif 1139 #if defined(PETSC_HAVE_HYPRE) 1140 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1142 #endif 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1149 { 1150 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1151 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1152 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1153 PetscErrorCode ierr; 1154 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1155 int fd; 1156 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1157 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1158 PetscScalar *column_values; 1159 PetscInt message_count,flowcontrolcount; 1160 FILE *file; 1161 1162 PetscFunctionBegin; 1163 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1164 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1165 nz = A->nz + B->nz; 1166 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1167 if (!rank) { 1168 header[0] = MAT_FILE_CLASSID; 1169 header[1] = mat->rmap->N; 1170 header[2] = mat->cmap->N; 1171 1172 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1173 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1174 /* get largest number of rows any processor has */ 1175 rlen = mat->rmap->n; 1176 range = mat->rmap->range; 1177 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1178 } else { 1179 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1180 rlen = mat->rmap->n; 1181 } 1182 1183 /* load up the local row counts */ 1184 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1185 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1186 1187 /* store the row lengths to the file */ 1188 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1189 if (!rank) { 1190 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1191 for (i=1; i<size; i++) { 1192 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1193 rlen = range[i+1] - range[i]; 1194 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1195 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1196 } 1197 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1198 } else { 1199 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1200 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1201 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1202 } 1203 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1204 1205 /* load up the local column indices */ 1206 nzmax = nz; /* th processor needs space a largest processor needs */ 1207 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1208 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1209 cnt = 0; 1210 for (i=0; i<mat->rmap->n; i++) { 1211 for (j=B->i[i]; j<B->i[i+1]; j++) { 1212 if ((col = garray[B->j[j]]) > cstart) break; 1213 column_indices[cnt++] = col; 1214 } 1215 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1216 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1217 } 1218 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1219 1220 /* store the column indices to the file */ 1221 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1222 if (!rank) { 1223 MPI_Status status; 1224 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1225 for (i=1; i<size; i++) { 1226 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1227 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1228 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1229 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1230 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1231 } 1232 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1233 } else { 1234 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1235 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1236 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1237 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1238 } 1239 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1240 1241 /* load up the local column values */ 1242 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1243 cnt = 0; 1244 for (i=0; i<mat->rmap->n; i++) { 1245 for (j=B->i[i]; j<B->i[i+1]; j++) { 1246 if (garray[B->j[j]] > cstart) break; 1247 column_values[cnt++] = B->a[j]; 1248 } 1249 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1250 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1251 } 1252 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1253 1254 /* store the column values to the file */ 1255 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1256 if (!rank) { 1257 MPI_Status status; 1258 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1259 for (i=1; i<size; i++) { 1260 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1261 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1262 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1263 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1264 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1265 } 1266 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1267 } else { 1268 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1269 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1271 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1272 } 1273 ierr = PetscFree(column_values);CHKERRQ(ierr); 1274 1275 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1276 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #include <petscdraw.h> 1281 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1282 { 1283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1284 PetscErrorCode ierr; 1285 PetscMPIInt rank = aij->rank,size = aij->size; 1286 PetscBool isdraw,iascii,isbinary; 1287 PetscViewer sviewer; 1288 PetscViewerFormat format; 1289 1290 PetscFunctionBegin; 1291 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1292 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1293 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1294 if (iascii) { 1295 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1296 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1297 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1298 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1299 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1300 for (i=0; i<(PetscInt)size; i++) { 1301 nmax = PetscMax(nmax,nz[i]); 1302 nmin = PetscMin(nmin,nz[i]); 1303 navg += nz[i]; 1304 } 1305 ierr = PetscFree(nz);CHKERRQ(ierr); 1306 navg = navg/size; 1307 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1311 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1312 MatInfo info; 1313 PetscBool inodes; 1314 1315 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1316 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1317 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1318 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1319 if (!inodes) { 1320 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1321 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1322 } else { 1323 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1324 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1325 } 1326 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1327 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1328 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1329 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1330 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1331 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1332 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1333 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1336 PetscInt inodecount,inodelimit,*inodes; 1337 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1338 if (inodes) { 1339 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1340 } else { 1341 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1342 } 1343 PetscFunctionReturn(0); 1344 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1345 PetscFunctionReturn(0); 1346 } 1347 } else if (isbinary) { 1348 if (size == 1) { 1349 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1350 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1351 } else { 1352 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1353 } 1354 PetscFunctionReturn(0); 1355 } else if (isdraw) { 1356 PetscDraw draw; 1357 PetscBool isnull; 1358 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1359 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1360 if (isnull) PetscFunctionReturn(0); 1361 } 1362 1363 { 1364 /* assemble the entire matrix onto first processor. */ 1365 Mat A; 1366 Mat_SeqAIJ *Aloc; 1367 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1368 MatScalar *a; 1369 1370 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1371 if (!rank) { 1372 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1373 } else { 1374 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1375 } 1376 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1377 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1378 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1379 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1380 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1381 1382 /* copy over the A part */ 1383 Aloc = (Mat_SeqAIJ*)aij->A->data; 1384 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1385 row = mat->rmap->rstart; 1386 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1387 for (i=0; i<m; i++) { 1388 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1389 row++; 1390 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1391 } 1392 aj = Aloc->j; 1393 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1394 1395 /* copy over the B part */ 1396 Aloc = (Mat_SeqAIJ*)aij->B->data; 1397 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1398 row = mat->rmap->rstart; 1399 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1400 ct = cols; 1401 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1402 for (i=0; i<m; i++) { 1403 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1404 row++; 1405 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1406 } 1407 ierr = PetscFree(ct);CHKERRQ(ierr); 1408 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1409 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1410 /* 1411 Everyone has to call to draw the matrix since the graphics waits are 1412 synchronized across all processors that share the PetscDraw object 1413 */ 1414 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1415 if (!rank) { 1416 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1417 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1418 } 1419 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1420 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1421 ierr = MatDestroy(&A);CHKERRQ(ierr); 1422 } 1423 PetscFunctionReturn(0); 1424 } 1425 1426 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1427 { 1428 PetscErrorCode ierr; 1429 PetscBool iascii,isdraw,issocket,isbinary; 1430 1431 PetscFunctionBegin; 1432 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1433 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1434 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1435 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1436 if (iascii || isdraw || isbinary || issocket) { 1437 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1438 } 1439 PetscFunctionReturn(0); 1440 } 1441 1442 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1443 { 1444 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1445 PetscErrorCode ierr; 1446 Vec bb1 = 0; 1447 PetscBool hasop; 1448 1449 PetscFunctionBegin; 1450 if (flag == SOR_APPLY_UPPER) { 1451 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1452 PetscFunctionReturn(0); 1453 } 1454 1455 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1456 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1457 } 1458 1459 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1460 if (flag & SOR_ZERO_INITIAL_GUESS) { 1461 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1462 its--; 1463 } 1464 1465 while (its--) { 1466 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1467 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1468 1469 /* update rhs: bb1 = bb - B*x */ 1470 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1471 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1472 1473 /* local sweep */ 1474 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1475 } 1476 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1477 if (flag & SOR_ZERO_INITIAL_GUESS) { 1478 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1479 its--; 1480 } 1481 while (its--) { 1482 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1483 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1484 1485 /* update rhs: bb1 = bb - B*x */ 1486 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1487 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1488 1489 /* local sweep */ 1490 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1491 } 1492 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1493 if (flag & SOR_ZERO_INITIAL_GUESS) { 1494 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1495 its--; 1496 } 1497 while (its--) { 1498 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1499 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1500 1501 /* update rhs: bb1 = bb - B*x */ 1502 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1503 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1504 1505 /* local sweep */ 1506 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1507 } 1508 } else if (flag & SOR_EISENSTAT) { 1509 Vec xx1; 1510 1511 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1512 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1513 1514 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1515 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1516 if (!mat->diag) { 1517 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1518 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1519 } 1520 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1521 if (hasop) { 1522 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1523 } else { 1524 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1525 } 1526 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1527 1528 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1529 1530 /* local sweep */ 1531 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1532 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1533 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1534 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1535 1536 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1537 1538 matin->factorerrortype = mat->A->factorerrortype; 1539 PetscFunctionReturn(0); 1540 } 1541 1542 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1543 { 1544 Mat aA,aB,Aperm; 1545 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1546 PetscScalar *aa,*ba; 1547 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1548 PetscSF rowsf,sf; 1549 IS parcolp = NULL; 1550 PetscBool done; 1551 PetscErrorCode ierr; 1552 1553 PetscFunctionBegin; 1554 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1555 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1556 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1557 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1558 1559 /* Invert row permutation to find out where my rows should go */ 1560 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1561 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1562 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1563 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1564 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1565 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1566 1567 /* Invert column permutation to find out where my columns should go */ 1568 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1569 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1570 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1571 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1572 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1573 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1574 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1575 1576 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1577 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1578 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1579 1580 /* Find out where my gcols should go */ 1581 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1582 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1583 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1584 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1585 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1586 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1587 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1588 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1589 1590 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1591 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1592 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1593 for (i=0; i<m; i++) { 1594 PetscInt row = rdest[i],rowner; 1595 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1596 for (j=ai[i]; j<ai[i+1]; j++) { 1597 PetscInt cowner,col = cdest[aj[j]]; 1598 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1599 if (rowner == cowner) dnnz[i]++; 1600 else onnz[i]++; 1601 } 1602 for (j=bi[i]; j<bi[i+1]; j++) { 1603 PetscInt cowner,col = gcdest[bj[j]]; 1604 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1605 if (rowner == cowner) dnnz[i]++; 1606 else onnz[i]++; 1607 } 1608 } 1609 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1610 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1611 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1612 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1613 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1614 1615 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1616 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1617 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1618 for (i=0; i<m; i++) { 1619 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1620 PetscInt j0,rowlen; 1621 rowlen = ai[i+1] - ai[i]; 1622 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1623 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1624 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1625 } 1626 rowlen = bi[i+1] - bi[i]; 1627 for (j0=j=0; j<rowlen; j0=j) { 1628 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1629 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1630 } 1631 } 1632 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1633 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1634 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1635 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1636 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1637 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1638 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1639 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1640 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1641 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1642 *B = Aperm; 1643 PetscFunctionReturn(0); 1644 } 1645 1646 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1647 { 1648 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1649 PetscErrorCode ierr; 1650 1651 PetscFunctionBegin; 1652 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1653 if (ghosts) *ghosts = aij->garray; 1654 PetscFunctionReturn(0); 1655 } 1656 1657 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1658 { 1659 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1660 Mat A = mat->A,B = mat->B; 1661 PetscErrorCode ierr; 1662 PetscReal isend[5],irecv[5]; 1663 1664 PetscFunctionBegin; 1665 info->block_size = 1.0; 1666 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1667 1668 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1669 isend[3] = info->memory; isend[4] = info->mallocs; 1670 1671 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1672 1673 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1674 isend[3] += info->memory; isend[4] += info->mallocs; 1675 if (flag == MAT_LOCAL) { 1676 info->nz_used = isend[0]; 1677 info->nz_allocated = isend[1]; 1678 info->nz_unneeded = isend[2]; 1679 info->memory = isend[3]; 1680 info->mallocs = isend[4]; 1681 } else if (flag == MAT_GLOBAL_MAX) { 1682 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1683 1684 info->nz_used = irecv[0]; 1685 info->nz_allocated = irecv[1]; 1686 info->nz_unneeded = irecv[2]; 1687 info->memory = irecv[3]; 1688 info->mallocs = irecv[4]; 1689 } else if (flag == MAT_GLOBAL_SUM) { 1690 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1691 1692 info->nz_used = irecv[0]; 1693 info->nz_allocated = irecv[1]; 1694 info->nz_unneeded = irecv[2]; 1695 info->memory = irecv[3]; 1696 info->mallocs = irecv[4]; 1697 } 1698 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1699 info->fill_ratio_needed = 0; 1700 info->factor_mallocs = 0; 1701 PetscFunctionReturn(0); 1702 } 1703 1704 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1705 { 1706 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1707 PetscErrorCode ierr; 1708 1709 PetscFunctionBegin; 1710 switch (op) { 1711 case MAT_NEW_NONZERO_LOCATIONS: 1712 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1713 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1714 case MAT_KEEP_NONZERO_PATTERN: 1715 case MAT_NEW_NONZERO_LOCATION_ERR: 1716 case MAT_USE_INODES: 1717 case MAT_IGNORE_ZERO_ENTRIES: 1718 MatCheckPreallocated(A,1); 1719 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1720 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1721 break; 1722 case MAT_ROW_ORIENTED: 1723 MatCheckPreallocated(A,1); 1724 a->roworiented = flg; 1725 1726 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1727 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1728 break; 1729 case MAT_NEW_DIAGONALS: 1730 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1731 break; 1732 case MAT_IGNORE_OFF_PROC_ENTRIES: 1733 a->donotstash = flg; 1734 break; 1735 case MAT_SPD: 1736 A->spd_set = PETSC_TRUE; 1737 A->spd = flg; 1738 if (flg) { 1739 A->symmetric = PETSC_TRUE; 1740 A->structurally_symmetric = PETSC_TRUE; 1741 A->symmetric_set = PETSC_TRUE; 1742 A->structurally_symmetric_set = PETSC_TRUE; 1743 } 1744 break; 1745 case MAT_SYMMETRIC: 1746 MatCheckPreallocated(A,1); 1747 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1748 break; 1749 case MAT_STRUCTURALLY_SYMMETRIC: 1750 MatCheckPreallocated(A,1); 1751 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1752 break; 1753 case MAT_HERMITIAN: 1754 MatCheckPreallocated(A,1); 1755 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1756 break; 1757 case MAT_SYMMETRY_ETERNAL: 1758 MatCheckPreallocated(A,1); 1759 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1760 break; 1761 case MAT_SUBMAT_SINGLEIS: 1762 A->submat_singleis = flg; 1763 break; 1764 case MAT_STRUCTURE_ONLY: 1765 /* The option is handled directly by MatSetOption() */ 1766 break; 1767 default: 1768 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1769 } 1770 PetscFunctionReturn(0); 1771 } 1772 1773 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1774 { 1775 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1776 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1777 PetscErrorCode ierr; 1778 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1779 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1780 PetscInt *cmap,*idx_p; 1781 1782 PetscFunctionBegin; 1783 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1784 mat->getrowactive = PETSC_TRUE; 1785 1786 if (!mat->rowvalues && (idx || v)) { 1787 /* 1788 allocate enough space to hold information from the longest row. 1789 */ 1790 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1791 PetscInt max = 1,tmp; 1792 for (i=0; i<matin->rmap->n; i++) { 1793 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1794 if (max < tmp) max = tmp; 1795 } 1796 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1797 } 1798 1799 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1800 lrow = row - rstart; 1801 1802 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1803 if (!v) {pvA = 0; pvB = 0;} 1804 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1805 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1806 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1807 nztot = nzA + nzB; 1808 1809 cmap = mat->garray; 1810 if (v || idx) { 1811 if (nztot) { 1812 /* Sort by increasing column numbers, assuming A and B already sorted */ 1813 PetscInt imark = -1; 1814 if (v) { 1815 *v = v_p = mat->rowvalues; 1816 for (i=0; i<nzB; i++) { 1817 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1818 else break; 1819 } 1820 imark = i; 1821 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1822 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1823 } 1824 if (idx) { 1825 *idx = idx_p = mat->rowindices; 1826 if (imark > -1) { 1827 for (i=0; i<imark; i++) { 1828 idx_p[i] = cmap[cworkB[i]]; 1829 } 1830 } else { 1831 for (i=0; i<nzB; i++) { 1832 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1833 else break; 1834 } 1835 imark = i; 1836 } 1837 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1838 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1839 } 1840 } else { 1841 if (idx) *idx = 0; 1842 if (v) *v = 0; 1843 } 1844 } 1845 *nz = nztot; 1846 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1847 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1848 PetscFunctionReturn(0); 1849 } 1850 1851 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1852 { 1853 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1854 1855 PetscFunctionBegin; 1856 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1857 aij->getrowactive = PETSC_FALSE; 1858 PetscFunctionReturn(0); 1859 } 1860 1861 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1862 { 1863 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1864 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1865 PetscErrorCode ierr; 1866 PetscInt i,j,cstart = mat->cmap->rstart; 1867 PetscReal sum = 0.0; 1868 MatScalar *v; 1869 1870 PetscFunctionBegin; 1871 if (aij->size == 1) { 1872 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1873 } else { 1874 if (type == NORM_FROBENIUS) { 1875 v = amat->a; 1876 for (i=0; i<amat->nz; i++) { 1877 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1878 } 1879 v = bmat->a; 1880 for (i=0; i<bmat->nz; i++) { 1881 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1882 } 1883 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1884 *norm = PetscSqrtReal(*norm); 1885 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1886 } else if (type == NORM_1) { /* max column norm */ 1887 PetscReal *tmp,*tmp2; 1888 PetscInt *jj,*garray = aij->garray; 1889 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1890 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1891 *norm = 0.0; 1892 v = amat->a; jj = amat->j; 1893 for (j=0; j<amat->nz; j++) { 1894 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1895 } 1896 v = bmat->a; jj = bmat->j; 1897 for (j=0; j<bmat->nz; j++) { 1898 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1899 } 1900 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1901 for (j=0; j<mat->cmap->N; j++) { 1902 if (tmp2[j] > *norm) *norm = tmp2[j]; 1903 } 1904 ierr = PetscFree(tmp);CHKERRQ(ierr); 1905 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1906 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1907 } else if (type == NORM_INFINITY) { /* max row norm */ 1908 PetscReal ntemp = 0.0; 1909 for (j=0; j<aij->A->rmap->n; j++) { 1910 v = amat->a + amat->i[j]; 1911 sum = 0.0; 1912 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1913 sum += PetscAbsScalar(*v); v++; 1914 } 1915 v = bmat->a + bmat->i[j]; 1916 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1917 sum += PetscAbsScalar(*v); v++; 1918 } 1919 if (sum > ntemp) ntemp = sum; 1920 } 1921 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1922 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1923 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1924 } 1925 PetscFunctionReturn(0); 1926 } 1927 1928 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1929 { 1930 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1931 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1932 PetscErrorCode ierr; 1933 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1934 PetscInt cstart = A->cmap->rstart,ncol; 1935 Mat B; 1936 MatScalar *array; 1937 1938 PetscFunctionBegin; 1939 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1940 ai = Aloc->i; aj = Aloc->j; 1941 bi = Bloc->i; bj = Bloc->j; 1942 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1943 PetscInt *d_nnz,*g_nnz,*o_nnz; 1944 PetscSFNode *oloc; 1945 PETSC_UNUSED PetscSF sf; 1946 1947 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1948 /* compute d_nnz for preallocation */ 1949 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1950 for (i=0; i<ai[ma]; i++) { 1951 d_nnz[aj[i]]++; 1952 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1953 } 1954 /* compute local off-diagonal contributions */ 1955 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1956 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1957 /* map those to global */ 1958 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1959 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1960 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1961 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1962 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1963 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1964 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1965 1966 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1967 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1968 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1969 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1970 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1971 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1972 } else { 1973 B = *matout; 1974 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1975 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1976 } 1977 1978 /* copy over the A part */ 1979 array = Aloc->a; 1980 row = A->rmap->rstart; 1981 for (i=0; i<ma; i++) { 1982 ncol = ai[i+1]-ai[i]; 1983 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1984 row++; 1985 array += ncol; aj += ncol; 1986 } 1987 aj = Aloc->j; 1988 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1989 1990 /* copy over the B part */ 1991 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1992 array = Bloc->a; 1993 row = A->rmap->rstart; 1994 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1995 cols_tmp = cols; 1996 for (i=0; i<mb; i++) { 1997 ncol = bi[i+1]-bi[i]; 1998 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1999 row++; 2000 array += ncol; cols_tmp += ncol; 2001 } 2002 ierr = PetscFree(cols);CHKERRQ(ierr); 2003 2004 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2005 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2006 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2007 *matout = B; 2008 } else { 2009 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2010 } 2011 PetscFunctionReturn(0); 2012 } 2013 2014 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2015 { 2016 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2017 Mat a = aij->A,b = aij->B; 2018 PetscErrorCode ierr; 2019 PetscInt s1,s2,s3; 2020 2021 PetscFunctionBegin; 2022 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2023 if (rr) { 2024 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2025 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2026 /* Overlap communication with computation. */ 2027 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2028 } 2029 if (ll) { 2030 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2031 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2032 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2033 } 2034 /* scale the diagonal block */ 2035 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2036 2037 if (rr) { 2038 /* Do a scatter end and then right scale the off-diagonal block */ 2039 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2040 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2041 } 2042 PetscFunctionReturn(0); 2043 } 2044 2045 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2046 { 2047 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2048 PetscErrorCode ierr; 2049 2050 PetscFunctionBegin; 2051 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2052 PetscFunctionReturn(0); 2053 } 2054 2055 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2056 { 2057 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2058 Mat a,b,c,d; 2059 PetscBool flg; 2060 PetscErrorCode ierr; 2061 2062 PetscFunctionBegin; 2063 a = matA->A; b = matA->B; 2064 c = matB->A; d = matB->B; 2065 2066 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2067 if (flg) { 2068 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2069 } 2070 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2071 PetscFunctionReturn(0); 2072 } 2073 2074 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2075 { 2076 PetscErrorCode ierr; 2077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2078 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2079 2080 PetscFunctionBegin; 2081 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2082 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2083 /* because of the column compression in the off-processor part of the matrix a->B, 2084 the number of columns in a->B and b->B may be different, hence we cannot call 2085 the MatCopy() directly on the two parts. If need be, we can provide a more 2086 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2087 then copying the submatrices */ 2088 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2089 } else { 2090 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2091 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2092 } 2093 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2094 PetscFunctionReturn(0); 2095 } 2096 2097 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2098 { 2099 PetscErrorCode ierr; 2100 2101 PetscFunctionBegin; 2102 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2103 PetscFunctionReturn(0); 2104 } 2105 2106 /* 2107 Computes the number of nonzeros per row needed for preallocation when X and Y 2108 have different nonzero structure. 2109 */ 2110 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2111 { 2112 PetscInt i,j,k,nzx,nzy; 2113 2114 PetscFunctionBegin; 2115 /* Set the number of nonzeros in the new matrix */ 2116 for (i=0; i<m; i++) { 2117 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2118 nzx = xi[i+1] - xi[i]; 2119 nzy = yi[i+1] - yi[i]; 2120 nnz[i] = 0; 2121 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2122 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2123 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2124 nnz[i]++; 2125 } 2126 for (; k<nzy; k++) nnz[i]++; 2127 } 2128 PetscFunctionReturn(0); 2129 } 2130 2131 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2132 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2133 { 2134 PetscErrorCode ierr; 2135 PetscInt m = Y->rmap->N; 2136 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2137 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2138 2139 PetscFunctionBegin; 2140 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2141 PetscFunctionReturn(0); 2142 } 2143 2144 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2145 { 2146 PetscErrorCode ierr; 2147 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2148 PetscBLASInt bnz,one=1; 2149 Mat_SeqAIJ *x,*y; 2150 2151 PetscFunctionBegin; 2152 if (str == SAME_NONZERO_PATTERN) { 2153 PetscScalar alpha = a; 2154 x = (Mat_SeqAIJ*)xx->A->data; 2155 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2156 y = (Mat_SeqAIJ*)yy->A->data; 2157 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2158 x = (Mat_SeqAIJ*)xx->B->data; 2159 y = (Mat_SeqAIJ*)yy->B->data; 2160 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2161 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2162 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2163 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2164 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2165 } else { 2166 Mat B; 2167 PetscInt *nnz_d,*nnz_o; 2168 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2169 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2170 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2171 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2172 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2173 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2174 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2175 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2176 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2177 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2178 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2179 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2180 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2181 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2182 } 2183 PetscFunctionReturn(0); 2184 } 2185 2186 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2187 2188 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2189 { 2190 #if defined(PETSC_USE_COMPLEX) 2191 PetscErrorCode ierr; 2192 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2193 2194 PetscFunctionBegin; 2195 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2196 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2197 #else 2198 PetscFunctionBegin; 2199 #endif 2200 PetscFunctionReturn(0); 2201 } 2202 2203 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2204 { 2205 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2206 PetscErrorCode ierr; 2207 2208 PetscFunctionBegin; 2209 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2210 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2215 { 2216 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2217 PetscErrorCode ierr; 2218 2219 PetscFunctionBegin; 2220 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2221 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2222 PetscFunctionReturn(0); 2223 } 2224 2225 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2226 { 2227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2228 PetscErrorCode ierr; 2229 PetscInt i,*idxb = 0; 2230 PetscScalar *va,*vb; 2231 Vec vtmp; 2232 2233 PetscFunctionBegin; 2234 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2235 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2236 if (idx) { 2237 for (i=0; i<A->rmap->n; i++) { 2238 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2239 } 2240 } 2241 2242 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2243 if (idx) { 2244 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2245 } 2246 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2247 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2248 2249 for (i=0; i<A->rmap->n; i++) { 2250 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2251 va[i] = vb[i]; 2252 if (idx) idx[i] = a->garray[idxb[i]]; 2253 } 2254 } 2255 2256 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2257 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2258 ierr = PetscFree(idxb);CHKERRQ(ierr); 2259 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2260 PetscFunctionReturn(0); 2261 } 2262 2263 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2264 { 2265 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2266 PetscErrorCode ierr; 2267 PetscInt i,*idxb = 0; 2268 PetscScalar *va,*vb; 2269 Vec vtmp; 2270 2271 PetscFunctionBegin; 2272 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2273 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2274 if (idx) { 2275 for (i=0; i<A->cmap->n; i++) { 2276 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2277 } 2278 } 2279 2280 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2281 if (idx) { 2282 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2283 } 2284 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2285 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2286 2287 for (i=0; i<A->rmap->n; i++) { 2288 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2289 va[i] = vb[i]; 2290 if (idx) idx[i] = a->garray[idxb[i]]; 2291 } 2292 } 2293 2294 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2295 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2296 ierr = PetscFree(idxb);CHKERRQ(ierr); 2297 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2298 PetscFunctionReturn(0); 2299 } 2300 2301 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2302 { 2303 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2304 PetscInt n = A->rmap->n; 2305 PetscInt cstart = A->cmap->rstart; 2306 PetscInt *cmap = mat->garray; 2307 PetscInt *diagIdx, *offdiagIdx; 2308 Vec diagV, offdiagV; 2309 PetscScalar *a, *diagA, *offdiagA; 2310 PetscInt r; 2311 PetscErrorCode ierr; 2312 2313 PetscFunctionBegin; 2314 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2315 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2316 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2317 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2318 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2319 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2320 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2321 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2322 for (r = 0; r < n; ++r) { 2323 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2324 a[r] = diagA[r]; 2325 idx[r] = cstart + diagIdx[r]; 2326 } else { 2327 a[r] = offdiagA[r]; 2328 idx[r] = cmap[offdiagIdx[r]]; 2329 } 2330 } 2331 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2332 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2333 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2334 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2335 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2336 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2341 { 2342 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2343 PetscInt n = A->rmap->n; 2344 PetscInt cstart = A->cmap->rstart; 2345 PetscInt *cmap = mat->garray; 2346 PetscInt *diagIdx, *offdiagIdx; 2347 Vec diagV, offdiagV; 2348 PetscScalar *a, *diagA, *offdiagA; 2349 PetscInt r; 2350 PetscErrorCode ierr; 2351 2352 PetscFunctionBegin; 2353 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2354 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2355 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2356 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2357 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2358 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2359 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2360 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2361 for (r = 0; r < n; ++r) { 2362 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2363 a[r] = diagA[r]; 2364 idx[r] = cstart + diagIdx[r]; 2365 } else { 2366 a[r] = offdiagA[r]; 2367 idx[r] = cmap[offdiagIdx[r]]; 2368 } 2369 } 2370 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2371 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2372 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2373 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2374 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2375 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2380 { 2381 PetscErrorCode ierr; 2382 Mat *dummy; 2383 2384 PetscFunctionBegin; 2385 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2386 *newmat = *dummy; 2387 ierr = PetscFree(dummy);CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390 2391 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2392 { 2393 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2394 PetscErrorCode ierr; 2395 2396 PetscFunctionBegin; 2397 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2398 A->factorerrortype = a->A->factorerrortype; 2399 PetscFunctionReturn(0); 2400 } 2401 2402 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2403 { 2404 PetscErrorCode ierr; 2405 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2406 2407 PetscFunctionBegin; 2408 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2409 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2410 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2411 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2416 { 2417 PetscFunctionBegin; 2418 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2419 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2420 PetscFunctionReturn(0); 2421 } 2422 2423 /*@ 2424 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2425 2426 Collective on Mat 2427 2428 Input Parameters: 2429 + A - the matrix 2430 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2431 2432 Level: advanced 2433 2434 @*/ 2435 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2436 { 2437 PetscErrorCode ierr; 2438 2439 PetscFunctionBegin; 2440 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2441 PetscFunctionReturn(0); 2442 } 2443 2444 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2445 { 2446 PetscErrorCode ierr; 2447 PetscBool sc = PETSC_FALSE,flg; 2448 2449 PetscFunctionBegin; 2450 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2451 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 2452 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2453 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2454 if (flg) { 2455 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2456 } 2457 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2458 PetscFunctionReturn(0); 2459 } 2460 2461 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2462 { 2463 PetscErrorCode ierr; 2464 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2465 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2466 2467 PetscFunctionBegin; 2468 if (!Y->preallocated) { 2469 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2470 } else if (!aij->nz) { 2471 PetscInt nonew = aij->nonew; 2472 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2473 aij->nonew = nonew; 2474 } 2475 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2476 PetscFunctionReturn(0); 2477 } 2478 2479 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2480 { 2481 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2482 PetscErrorCode ierr; 2483 2484 PetscFunctionBegin; 2485 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2486 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2487 if (d) { 2488 PetscInt rstart; 2489 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2490 *d += rstart; 2491 2492 } 2493 PetscFunctionReturn(0); 2494 } 2495 2496 2497 /* -------------------------------------------------------------------*/ 2498 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2499 MatGetRow_MPIAIJ, 2500 MatRestoreRow_MPIAIJ, 2501 MatMult_MPIAIJ, 2502 /* 4*/ MatMultAdd_MPIAIJ, 2503 MatMultTranspose_MPIAIJ, 2504 MatMultTransposeAdd_MPIAIJ, 2505 0, 2506 0, 2507 0, 2508 /*10*/ 0, 2509 0, 2510 0, 2511 MatSOR_MPIAIJ, 2512 MatTranspose_MPIAIJ, 2513 /*15*/ MatGetInfo_MPIAIJ, 2514 MatEqual_MPIAIJ, 2515 MatGetDiagonal_MPIAIJ, 2516 MatDiagonalScale_MPIAIJ, 2517 MatNorm_MPIAIJ, 2518 /*20*/ MatAssemblyBegin_MPIAIJ, 2519 MatAssemblyEnd_MPIAIJ, 2520 MatSetOption_MPIAIJ, 2521 MatZeroEntries_MPIAIJ, 2522 /*24*/ MatZeroRows_MPIAIJ, 2523 0, 2524 0, 2525 0, 2526 0, 2527 /*29*/ MatSetUp_MPIAIJ, 2528 0, 2529 0, 2530 MatGetDiagonalBlock_MPIAIJ, 2531 0, 2532 /*34*/ MatDuplicate_MPIAIJ, 2533 0, 2534 0, 2535 0, 2536 0, 2537 /*39*/ MatAXPY_MPIAIJ, 2538 MatCreateSubMatrices_MPIAIJ, 2539 MatIncreaseOverlap_MPIAIJ, 2540 MatGetValues_MPIAIJ, 2541 MatCopy_MPIAIJ, 2542 /*44*/ MatGetRowMax_MPIAIJ, 2543 MatScale_MPIAIJ, 2544 MatShift_MPIAIJ, 2545 MatDiagonalSet_MPIAIJ, 2546 MatZeroRowsColumns_MPIAIJ, 2547 /*49*/ MatSetRandom_MPIAIJ, 2548 0, 2549 0, 2550 0, 2551 0, 2552 /*54*/ MatFDColoringCreate_MPIXAIJ, 2553 0, 2554 MatSetUnfactored_MPIAIJ, 2555 MatPermute_MPIAIJ, 2556 0, 2557 /*59*/ MatCreateSubMatrix_MPIAIJ, 2558 MatDestroy_MPIAIJ, 2559 MatView_MPIAIJ, 2560 0, 2561 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2562 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2563 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2564 0, 2565 0, 2566 0, 2567 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2568 MatGetRowMinAbs_MPIAIJ, 2569 0, 2570 0, 2571 0, 2572 0, 2573 /*75*/ MatFDColoringApply_AIJ, 2574 MatSetFromOptions_MPIAIJ, 2575 0, 2576 0, 2577 MatFindZeroDiagonals_MPIAIJ, 2578 /*80*/ 0, 2579 0, 2580 0, 2581 /*83*/ MatLoad_MPIAIJ, 2582 MatIsSymmetric_MPIAIJ, 2583 0, 2584 0, 2585 0, 2586 0, 2587 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2588 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2589 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2590 MatPtAP_MPIAIJ_MPIAIJ, 2591 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2592 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2593 0, 2594 0, 2595 0, 2596 0, 2597 /*99*/ 0, 2598 0, 2599 0, 2600 MatConjugate_MPIAIJ, 2601 0, 2602 /*104*/MatSetValuesRow_MPIAIJ, 2603 MatRealPart_MPIAIJ, 2604 MatImaginaryPart_MPIAIJ, 2605 0, 2606 0, 2607 /*109*/0, 2608 0, 2609 MatGetRowMin_MPIAIJ, 2610 0, 2611 MatMissingDiagonal_MPIAIJ, 2612 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2613 0, 2614 MatGetGhosts_MPIAIJ, 2615 0, 2616 0, 2617 /*119*/0, 2618 0, 2619 0, 2620 0, 2621 MatGetMultiProcBlock_MPIAIJ, 2622 /*124*/MatFindNonzeroRows_MPIAIJ, 2623 MatGetColumnNorms_MPIAIJ, 2624 MatInvertBlockDiagonal_MPIAIJ, 2625 0, 2626 MatCreateSubMatricesMPI_MPIAIJ, 2627 /*129*/0, 2628 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2629 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2630 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2631 0, 2632 /*134*/0, 2633 0, 2634 MatRARt_MPIAIJ_MPIAIJ, 2635 0, 2636 0, 2637 /*139*/MatSetBlockSizes_MPIAIJ, 2638 0, 2639 0, 2640 MatFDColoringSetUp_MPIXAIJ, 2641 MatFindOffBlockDiagonalEntries_MPIAIJ, 2642 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2643 }; 2644 2645 /* ----------------------------------------------------------------------------------------*/ 2646 2647 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2648 { 2649 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2650 PetscErrorCode ierr; 2651 2652 PetscFunctionBegin; 2653 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2654 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2655 PetscFunctionReturn(0); 2656 } 2657 2658 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2659 { 2660 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2661 PetscErrorCode ierr; 2662 2663 PetscFunctionBegin; 2664 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2665 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2666 PetscFunctionReturn(0); 2667 } 2668 2669 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2670 { 2671 Mat_MPIAIJ *b; 2672 PetscErrorCode ierr; 2673 2674 PetscFunctionBegin; 2675 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2676 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2677 b = (Mat_MPIAIJ*)B->data; 2678 2679 #if defined(PETSC_USE_CTABLE) 2680 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2681 #else 2682 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2683 #endif 2684 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2685 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2686 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2687 2688 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2689 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2690 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2691 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2692 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2693 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2694 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2695 2696 if (!B->preallocated) { 2697 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2698 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2699 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2700 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2701 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2702 } 2703 2704 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2705 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2706 B->preallocated = PETSC_TRUE; 2707 B->was_assembled = PETSC_FALSE; 2708 B->assembled = PETSC_FALSE;; 2709 PetscFunctionReturn(0); 2710 } 2711 2712 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2713 { 2714 Mat_MPIAIJ *b; 2715 PetscErrorCode ierr; 2716 2717 PetscFunctionBegin; 2718 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2719 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2720 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2721 b = (Mat_MPIAIJ*)B->data; 2722 2723 #if defined(PETSC_USE_CTABLE) 2724 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2725 #else 2726 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2727 #endif 2728 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2729 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2730 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2731 2732 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2733 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2734 B->preallocated = PETSC_TRUE; 2735 B->was_assembled = PETSC_FALSE; 2736 B->assembled = PETSC_FALSE; 2737 PetscFunctionReturn(0); 2738 } 2739 2740 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2741 { 2742 Mat mat; 2743 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 *newmat = 0; 2748 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2749 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2750 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2751 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2752 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2753 a = (Mat_MPIAIJ*)mat->data; 2754 2755 mat->factortype = matin->factortype; 2756 mat->assembled = PETSC_TRUE; 2757 mat->insertmode = NOT_SET_VALUES; 2758 mat->preallocated = PETSC_TRUE; 2759 2760 a->size = oldmat->size; 2761 a->rank = oldmat->rank; 2762 a->donotstash = oldmat->donotstash; 2763 a->roworiented = oldmat->roworiented; 2764 a->rowindices = 0; 2765 a->rowvalues = 0; 2766 a->getrowactive = PETSC_FALSE; 2767 2768 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2769 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2770 2771 if (oldmat->colmap) { 2772 #if defined(PETSC_USE_CTABLE) 2773 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2774 #else 2775 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2776 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2777 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2778 #endif 2779 } else a->colmap = 0; 2780 if (oldmat->garray) { 2781 PetscInt len; 2782 len = oldmat->B->cmap->n; 2783 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2784 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2785 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2786 } else a->garray = 0; 2787 2788 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2789 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2790 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2791 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2792 2793 if (oldmat->Mvctx_mpi1) { 2794 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2795 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2796 } 2797 2798 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2799 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2800 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2801 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2802 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2803 *newmat = mat; 2804 PetscFunctionReturn(0); 2805 } 2806 2807 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2808 { 2809 PetscScalar *vals,*svals; 2810 MPI_Comm comm; 2811 PetscErrorCode ierr; 2812 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2813 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2814 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2815 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2816 PetscInt cend,cstart,n,*rowners; 2817 int fd; 2818 PetscInt bs = newMat->rmap->bs; 2819 2820 PetscFunctionBegin; 2821 /* force binary viewer to load .info file if it has not yet done so */ 2822 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2823 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2824 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2825 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2826 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2827 if (!rank) { 2828 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2829 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2830 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2831 } 2832 2833 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2834 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2835 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2836 if (bs < 0) bs = 1; 2837 2838 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2839 M = header[1]; N = header[2]; 2840 2841 /* If global sizes are set, check if they are consistent with that given in the file */ 2842 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2843 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2844 2845 /* determine ownership of all (block) rows */ 2846 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2847 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2848 else m = newMat->rmap->n; /* Set by user */ 2849 2850 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2851 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2852 2853 /* First process needs enough room for process with most rows */ 2854 if (!rank) { 2855 mmax = rowners[1]; 2856 for (i=2; i<=size; i++) { 2857 mmax = PetscMax(mmax, rowners[i]); 2858 } 2859 } else mmax = -1; /* unused, but compilers complain */ 2860 2861 rowners[0] = 0; 2862 for (i=2; i<=size; i++) { 2863 rowners[i] += rowners[i-1]; 2864 } 2865 rstart = rowners[rank]; 2866 rend = rowners[rank+1]; 2867 2868 /* distribute row lengths to all processors */ 2869 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2870 if (!rank) { 2871 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2872 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2873 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2874 for (j=0; j<m; j++) { 2875 procsnz[0] += ourlens[j]; 2876 } 2877 for (i=1; i<size; i++) { 2878 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2879 /* calculate the number of nonzeros on each processor */ 2880 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2881 procsnz[i] += rowlengths[j]; 2882 } 2883 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2884 } 2885 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2886 } else { 2887 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2888 } 2889 2890 if (!rank) { 2891 /* determine max buffer needed and allocate it */ 2892 maxnz = 0; 2893 for (i=0; i<size; i++) { 2894 maxnz = PetscMax(maxnz,procsnz[i]); 2895 } 2896 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2897 2898 /* read in my part of the matrix column indices */ 2899 nz = procsnz[0]; 2900 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2901 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2902 2903 /* read in every one elses and ship off */ 2904 for (i=1; i<size; i++) { 2905 nz = procsnz[i]; 2906 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2907 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2908 } 2909 ierr = PetscFree(cols);CHKERRQ(ierr); 2910 } else { 2911 /* determine buffer space needed for message */ 2912 nz = 0; 2913 for (i=0; i<m; i++) { 2914 nz += ourlens[i]; 2915 } 2916 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2917 2918 /* receive message of column indices*/ 2919 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2920 } 2921 2922 /* determine column ownership if matrix is not square */ 2923 if (N != M) { 2924 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2925 else n = newMat->cmap->n; 2926 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2927 cstart = cend - n; 2928 } else { 2929 cstart = rstart; 2930 cend = rend; 2931 n = cend - cstart; 2932 } 2933 2934 /* loop over local rows, determining number of off diagonal entries */ 2935 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2936 jj = 0; 2937 for (i=0; i<m; i++) { 2938 for (j=0; j<ourlens[i]; j++) { 2939 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2940 jj++; 2941 } 2942 } 2943 2944 for (i=0; i<m; i++) { 2945 ourlens[i] -= offlens[i]; 2946 } 2947 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2948 2949 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2950 2951 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2952 2953 for (i=0; i<m; i++) { 2954 ourlens[i] += offlens[i]; 2955 } 2956 2957 if (!rank) { 2958 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2959 2960 /* read in my part of the matrix numerical values */ 2961 nz = procsnz[0]; 2962 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2963 2964 /* insert into matrix */ 2965 jj = rstart; 2966 smycols = mycols; 2967 svals = vals; 2968 for (i=0; i<m; i++) { 2969 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2970 smycols += ourlens[i]; 2971 svals += ourlens[i]; 2972 jj++; 2973 } 2974 2975 /* read in other processors and ship out */ 2976 for (i=1; i<size; i++) { 2977 nz = procsnz[i]; 2978 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2979 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2980 } 2981 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2982 } else { 2983 /* receive numeric values */ 2984 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2985 2986 /* receive message of values*/ 2987 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2988 2989 /* insert into matrix */ 2990 jj = rstart; 2991 smycols = mycols; 2992 svals = vals; 2993 for (i=0; i<m; i++) { 2994 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2995 smycols += ourlens[i]; 2996 svals += ourlens[i]; 2997 jj++; 2998 } 2999 } 3000 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3001 ierr = PetscFree(vals);CHKERRQ(ierr); 3002 ierr = PetscFree(mycols);CHKERRQ(ierr); 3003 ierr = PetscFree(rowners);CHKERRQ(ierr); 3004 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3005 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3006 PetscFunctionReturn(0); 3007 } 3008 3009 /* Not scalable because of ISAllGather() unless getting all columns. */ 3010 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3011 { 3012 PetscErrorCode ierr; 3013 IS iscol_local; 3014 PetscBool isstride; 3015 PetscMPIInt lisstride=0,gisstride; 3016 3017 PetscFunctionBegin; 3018 /* check if we are grabbing all columns*/ 3019 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3020 3021 if (isstride) { 3022 PetscInt start,len,mstart,mlen; 3023 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3024 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3025 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3026 if (mstart == start && mlen-mstart == len) lisstride = 1; 3027 } 3028 3029 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3030 if (gisstride) { 3031 PetscInt N; 3032 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3033 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3034 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3035 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3036 } else { 3037 PetscInt cbs; 3038 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3039 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3040 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3041 } 3042 3043 *isseq = iscol_local; 3044 PetscFunctionReturn(0); 3045 } 3046 3047 /* 3048 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3049 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3050 3051 Input Parameters: 3052 mat - matrix 3053 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3054 i.e., mat->rstart <= isrow[i] < mat->rend 3055 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3056 i.e., mat->cstart <= iscol[i] < mat->cend 3057 Output Parameter: 3058 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3059 iscol_o - sequential column index set for retrieving mat->B 3060 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3061 */ 3062 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3063 { 3064 PetscErrorCode ierr; 3065 Vec x,cmap; 3066 const PetscInt *is_idx; 3067 PetscScalar *xarray,*cmaparray; 3068 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3069 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3070 Mat B=a->B; 3071 Vec lvec=a->lvec,lcmap; 3072 PetscInt i,cstart,cend,Bn=B->cmap->N; 3073 MPI_Comm comm; 3074 VecScatter Mvctx=a->Mvctx; 3075 3076 PetscFunctionBegin; 3077 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3078 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3079 3080 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3081 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3082 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3083 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3084 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3085 3086 /* Get start indices */ 3087 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3088 isstart -= ncols; 3089 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3090 3091 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3092 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3093 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3094 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3095 for (i=0; i<ncols; i++) { 3096 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3097 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3098 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3099 } 3100 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3101 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3102 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3103 3104 /* Get iscol_d */ 3105 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3106 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3107 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3108 3109 /* Get isrow_d */ 3110 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3111 rstart = mat->rmap->rstart; 3112 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3113 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3114 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3115 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3116 3117 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3118 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3119 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3120 3121 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3122 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3123 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3124 3125 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3126 3127 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3128 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3129 3130 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3131 /* off-process column indices */ 3132 count = 0; 3133 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3134 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3135 3136 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3137 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3138 for (i=0; i<Bn; i++) { 3139 if (PetscRealPart(xarray[i]) > -1.0) { 3140 idx[count] = i; /* local column index in off-diagonal part B */ 3141 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3142 count++; 3143 } 3144 } 3145 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3146 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3147 3148 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3149 /* cannot ensure iscol_o has same blocksize as iscol! */ 3150 3151 ierr = PetscFree(idx);CHKERRQ(ierr); 3152 *garray = cmap1; 3153 3154 ierr = VecDestroy(&x);CHKERRQ(ierr); 3155 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3156 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3157 PetscFunctionReturn(0); 3158 } 3159 3160 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3161 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3162 { 3163 PetscErrorCode ierr; 3164 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3165 Mat M = NULL; 3166 MPI_Comm comm; 3167 IS iscol_d,isrow_d,iscol_o; 3168 Mat Asub = NULL,Bsub = NULL; 3169 PetscInt n; 3170 3171 PetscFunctionBegin; 3172 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3173 3174 if (call == MAT_REUSE_MATRIX) { 3175 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3176 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3177 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3178 3179 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3180 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3181 3182 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3183 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3184 3185 /* Update diagonal and off-diagonal portions of submat */ 3186 asub = (Mat_MPIAIJ*)(*submat)->data; 3187 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3188 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3189 if (n) { 3190 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3191 } 3192 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3193 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3194 3195 } else { /* call == MAT_INITIAL_MATRIX) */ 3196 const PetscInt *garray; 3197 PetscInt BsubN; 3198 3199 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3200 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3201 3202 /* Create local submatrices Asub and Bsub */ 3203 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3204 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3205 3206 /* Create submatrix M */ 3207 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3208 3209 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3210 asub = (Mat_MPIAIJ*)M->data; 3211 3212 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3213 n = asub->B->cmap->N; 3214 if (BsubN > n) { 3215 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3216 const PetscInt *idx; 3217 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3218 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3219 3220 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3221 j = 0; 3222 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3223 for (i=0; i<n; i++) { 3224 if (j >= BsubN) break; 3225 while (subgarray[i] > garray[j]) j++; 3226 3227 if (subgarray[i] == garray[j]) { 3228 idx_new[i] = idx[j++]; 3229 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3230 } 3231 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3232 3233 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3234 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3235 3236 } else if (BsubN < n) { 3237 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3238 } 3239 3240 ierr = PetscFree(garray);CHKERRQ(ierr); 3241 *submat = M; 3242 3243 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3244 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3245 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3246 3247 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3248 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3249 3250 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3251 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3252 } 3253 PetscFunctionReturn(0); 3254 } 3255 3256 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3257 { 3258 PetscErrorCode ierr; 3259 IS iscol_local=NULL,isrow_d; 3260 PetscInt csize; 3261 PetscInt n,i,j,start,end; 3262 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3263 MPI_Comm comm; 3264 3265 PetscFunctionBegin; 3266 /* If isrow has same processor distribution as mat, 3267 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3268 if (call == MAT_REUSE_MATRIX) { 3269 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3270 if (isrow_d) { 3271 sameRowDist = PETSC_TRUE; 3272 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3273 } else { 3274 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3275 if (iscol_local) { 3276 sameRowDist = PETSC_TRUE; 3277 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3278 } 3279 } 3280 } else { 3281 /* Check if isrow has same processor distribution as mat */ 3282 sameDist[0] = PETSC_FALSE; 3283 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3284 if (!n) { 3285 sameDist[0] = PETSC_TRUE; 3286 } else { 3287 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3288 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3289 if (i >= start && j < end) { 3290 sameDist[0] = PETSC_TRUE; 3291 } 3292 } 3293 3294 /* Check if iscol has same processor distribution as mat */ 3295 sameDist[1] = PETSC_FALSE; 3296 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3297 if (!n) { 3298 sameDist[1] = PETSC_TRUE; 3299 } else { 3300 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3301 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3302 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3303 } 3304 3305 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3306 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3307 sameRowDist = tsameDist[0]; 3308 } 3309 3310 if (sameRowDist) { 3311 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3312 /* isrow and iscol have same processor distribution as mat */ 3313 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3314 PetscFunctionReturn(0); 3315 } else { /* sameRowDist */ 3316 /* isrow has same processor distribution as mat */ 3317 if (call == MAT_INITIAL_MATRIX) { 3318 PetscBool sorted; 3319 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3320 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3321 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3322 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3323 3324 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3325 if (sorted) { 3326 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3327 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3328 PetscFunctionReturn(0); 3329 } 3330 } else { /* call == MAT_REUSE_MATRIX */ 3331 IS iscol_sub; 3332 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3333 if (iscol_sub) { 3334 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3335 PetscFunctionReturn(0); 3336 } 3337 } 3338 } 3339 } 3340 3341 /* General case: iscol -> iscol_local which has global size of iscol */ 3342 if (call == MAT_REUSE_MATRIX) { 3343 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3344 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3345 } else { 3346 if (!iscol_local) { 3347 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3348 } 3349 } 3350 3351 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3352 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3353 3354 if (call == MAT_INITIAL_MATRIX) { 3355 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3356 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3357 } 3358 PetscFunctionReturn(0); 3359 } 3360 3361 /*@C 3362 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3363 and "off-diagonal" part of the matrix in CSR format. 3364 3365 Collective on MPI_Comm 3366 3367 Input Parameters: 3368 + comm - MPI communicator 3369 . A - "diagonal" portion of matrix 3370 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3371 - garray - global index of B columns 3372 3373 Output Parameter: 3374 . mat - the matrix, with input A as its local diagonal matrix 3375 Level: advanced 3376 3377 Notes: 3378 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3379 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3380 3381 .seealso: MatCreateMPIAIJWithSplitArrays() 3382 @*/ 3383 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3384 { 3385 PetscErrorCode ierr; 3386 Mat_MPIAIJ *maij; 3387 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3388 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3389 PetscScalar *oa=b->a; 3390 Mat Bnew; 3391 PetscInt m,n,N; 3392 3393 PetscFunctionBegin; 3394 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3395 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3396 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3397 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3398 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3399 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3400 3401 /* Get global columns of mat */ 3402 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3403 3404 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3405 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3406 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3407 maij = (Mat_MPIAIJ*)(*mat)->data; 3408 3409 (*mat)->preallocated = PETSC_TRUE; 3410 3411 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3412 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3413 3414 /* Set A as diagonal portion of *mat */ 3415 maij->A = A; 3416 3417 nz = oi[m]; 3418 for (i=0; i<nz; i++) { 3419 col = oj[i]; 3420 oj[i] = garray[col]; 3421 } 3422 3423 /* Set Bnew as off-diagonal portion of *mat */ 3424 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3425 bnew = (Mat_SeqAIJ*)Bnew->data; 3426 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3427 maij->B = Bnew; 3428 3429 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3430 3431 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3432 b->free_a = PETSC_FALSE; 3433 b->free_ij = PETSC_FALSE; 3434 ierr = MatDestroy(&B);CHKERRQ(ierr); 3435 3436 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3437 bnew->free_a = PETSC_TRUE; 3438 bnew->free_ij = PETSC_TRUE; 3439 3440 /* condense columns of maij->B */ 3441 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3442 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3443 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3444 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3445 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3446 PetscFunctionReturn(0); 3447 } 3448 3449 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3450 3451 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3452 { 3453 PetscErrorCode ierr; 3454 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3455 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3456 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3457 Mat M,Msub,B=a->B; 3458 MatScalar *aa; 3459 Mat_SeqAIJ *aij; 3460 PetscInt *garray = a->garray,*colsub,Ncols; 3461 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3462 IS iscol_sub,iscmap; 3463 const PetscInt *is_idx,*cmap; 3464 PetscBool allcolumns=PETSC_FALSE; 3465 MPI_Comm comm; 3466 3467 PetscFunctionBegin; 3468 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3469 3470 if (call == MAT_REUSE_MATRIX) { 3471 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3472 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3473 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3474 3475 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3476 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3477 3478 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3479 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3480 3481 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3482 3483 } else { /* call == MAT_INITIAL_MATRIX) */ 3484 PetscBool flg; 3485 3486 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3487 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3488 3489 /* (1) iscol -> nonscalable iscol_local */ 3490 /* Check for special case: each processor gets entire matrix columns */ 3491 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3492 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3493 if (allcolumns) { 3494 iscol_sub = iscol_local; 3495 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3496 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3497 3498 } else { 3499 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3500 PetscInt *idx,*cmap1,k; 3501 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3502 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3503 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3504 count = 0; 3505 k = 0; 3506 for (i=0; i<Ncols; i++) { 3507 j = is_idx[i]; 3508 if (j >= cstart && j < cend) { 3509 /* diagonal part of mat */ 3510 idx[count] = j; 3511 cmap1[count++] = i; /* column index in submat */ 3512 } else if (Bn) { 3513 /* off-diagonal part of mat */ 3514 if (j == garray[k]) { 3515 idx[count] = j; 3516 cmap1[count++] = i; /* column index in submat */ 3517 } else if (j > garray[k]) { 3518 while (j > garray[k] && k < Bn-1) k++; 3519 if (j == garray[k]) { 3520 idx[count] = j; 3521 cmap1[count++] = i; /* column index in submat */ 3522 } 3523 } 3524 } 3525 } 3526 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3527 3528 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3529 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3530 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3531 3532 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3533 } 3534 3535 /* (3) Create sequential Msub */ 3536 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3537 } 3538 3539 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3540 aij = (Mat_SeqAIJ*)(Msub)->data; 3541 ii = aij->i; 3542 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3543 3544 /* 3545 m - number of local rows 3546 Ncols - number of columns (same on all processors) 3547 rstart - first row in new global matrix generated 3548 */ 3549 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3550 3551 if (call == MAT_INITIAL_MATRIX) { 3552 /* (4) Create parallel newmat */ 3553 PetscMPIInt rank,size; 3554 PetscInt csize; 3555 3556 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3557 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3558 3559 /* 3560 Determine the number of non-zeros in the diagonal and off-diagonal 3561 portions of the matrix in order to do correct preallocation 3562 */ 3563 3564 /* first get start and end of "diagonal" columns */ 3565 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3566 if (csize == PETSC_DECIDE) { 3567 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3568 if (mglobal == Ncols) { /* square matrix */ 3569 nlocal = m; 3570 } else { 3571 nlocal = Ncols/size + ((Ncols % size) > rank); 3572 } 3573 } else { 3574 nlocal = csize; 3575 } 3576 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3577 rstart = rend - nlocal; 3578 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3579 3580 /* next, compute all the lengths */ 3581 jj = aij->j; 3582 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3583 olens = dlens + m; 3584 for (i=0; i<m; i++) { 3585 jend = ii[i+1] - ii[i]; 3586 olen = 0; 3587 dlen = 0; 3588 for (j=0; j<jend; j++) { 3589 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3590 else dlen++; 3591 jj++; 3592 } 3593 olens[i] = olen; 3594 dlens[i] = dlen; 3595 } 3596 3597 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3598 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3599 3600 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3601 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3602 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3603 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3604 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3605 ierr = PetscFree(dlens);CHKERRQ(ierr); 3606 3607 } else { /* call == MAT_REUSE_MATRIX */ 3608 M = *newmat; 3609 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3610 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3611 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3612 /* 3613 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3614 rather than the slower MatSetValues(). 3615 */ 3616 M->was_assembled = PETSC_TRUE; 3617 M->assembled = PETSC_FALSE; 3618 } 3619 3620 /* (5) Set values of Msub to *newmat */ 3621 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3622 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3623 3624 jj = aij->j; 3625 aa = aij->a; 3626 for (i=0; i<m; i++) { 3627 row = rstart + i; 3628 nz = ii[i+1] - ii[i]; 3629 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3630 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3631 jj += nz; aa += nz; 3632 } 3633 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3634 3635 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3636 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3637 3638 ierr = PetscFree(colsub);CHKERRQ(ierr); 3639 3640 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3641 if (call == MAT_INITIAL_MATRIX) { 3642 *newmat = M; 3643 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3644 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3645 3646 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3647 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3648 3649 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3650 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3651 3652 if (iscol_local) { 3653 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3654 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3655 } 3656 } 3657 PetscFunctionReturn(0); 3658 } 3659 3660 /* 3661 Not great since it makes two copies of the submatrix, first an SeqAIJ 3662 in local and then by concatenating the local matrices the end result. 3663 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3664 3665 Note: This requires a sequential iscol with all indices. 3666 */ 3667 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3668 { 3669 PetscErrorCode ierr; 3670 PetscMPIInt rank,size; 3671 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3672 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3673 Mat M,Mreuse; 3674 MatScalar *aa,*vwork; 3675 MPI_Comm comm; 3676 Mat_SeqAIJ *aij; 3677 PetscBool colflag,allcolumns=PETSC_FALSE; 3678 3679 PetscFunctionBegin; 3680 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3681 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3682 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3683 3684 /* Check for special case: each processor gets entire matrix columns */ 3685 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3686 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3687 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3688 3689 if (call == MAT_REUSE_MATRIX) { 3690 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3691 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3692 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3693 } else { 3694 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3695 } 3696 3697 /* 3698 m - number of local rows 3699 n - number of columns (same on all processors) 3700 rstart - first row in new global matrix generated 3701 */ 3702 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3703 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3704 if (call == MAT_INITIAL_MATRIX) { 3705 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3706 ii = aij->i; 3707 jj = aij->j; 3708 3709 /* 3710 Determine the number of non-zeros in the diagonal and off-diagonal 3711 portions of the matrix in order to do correct preallocation 3712 */ 3713 3714 /* first get start and end of "diagonal" columns */ 3715 if (csize == PETSC_DECIDE) { 3716 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3717 if (mglobal == n) { /* square matrix */ 3718 nlocal = m; 3719 } else { 3720 nlocal = n/size + ((n % size) > rank); 3721 } 3722 } else { 3723 nlocal = csize; 3724 } 3725 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3726 rstart = rend - nlocal; 3727 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3728 3729 /* next, compute all the lengths */ 3730 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3731 olens = dlens + m; 3732 for (i=0; i<m; i++) { 3733 jend = ii[i+1] - ii[i]; 3734 olen = 0; 3735 dlen = 0; 3736 for (j=0; j<jend; j++) { 3737 if (*jj < rstart || *jj >= rend) olen++; 3738 else dlen++; 3739 jj++; 3740 } 3741 olens[i] = olen; 3742 dlens[i] = dlen; 3743 } 3744 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3745 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3746 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3747 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3748 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3749 ierr = PetscFree(dlens);CHKERRQ(ierr); 3750 } else { 3751 PetscInt ml,nl; 3752 3753 M = *newmat; 3754 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3755 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3756 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3757 /* 3758 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3759 rather than the slower MatSetValues(). 3760 */ 3761 M->was_assembled = PETSC_TRUE; 3762 M->assembled = PETSC_FALSE; 3763 } 3764 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3765 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3766 ii = aij->i; 3767 jj = aij->j; 3768 aa = aij->a; 3769 for (i=0; i<m; i++) { 3770 row = rstart + i; 3771 nz = ii[i+1] - ii[i]; 3772 cwork = jj; jj += nz; 3773 vwork = aa; aa += nz; 3774 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3775 } 3776 3777 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3778 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3779 *newmat = M; 3780 3781 /* save submatrix used in processor for next request */ 3782 if (call == MAT_INITIAL_MATRIX) { 3783 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3784 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3785 } 3786 PetscFunctionReturn(0); 3787 } 3788 3789 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3790 { 3791 PetscInt m,cstart, cend,j,nnz,i,d; 3792 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3793 const PetscInt *JJ; 3794 PetscScalar *values; 3795 PetscErrorCode ierr; 3796 PetscBool nooffprocentries; 3797 3798 PetscFunctionBegin; 3799 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3800 3801 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3802 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3803 m = B->rmap->n; 3804 cstart = B->cmap->rstart; 3805 cend = B->cmap->rend; 3806 rstart = B->rmap->rstart; 3807 3808 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3809 3810 #if defined(PETSC_USE_DEBUG) 3811 for (i=0; i<m; i++) { 3812 nnz = Ii[i+1]- Ii[i]; 3813 JJ = J + Ii[i]; 3814 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3815 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3816 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3817 } 3818 #endif 3819 3820 for (i=0; i<m; i++) { 3821 nnz = Ii[i+1]- Ii[i]; 3822 JJ = J + Ii[i]; 3823 nnz_max = PetscMax(nnz_max,nnz); 3824 d = 0; 3825 for (j=0; j<nnz; j++) { 3826 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3827 } 3828 d_nnz[i] = d; 3829 o_nnz[i] = nnz - d; 3830 } 3831 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3832 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3833 3834 if (v) values = (PetscScalar*)v; 3835 else { 3836 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3837 } 3838 3839 for (i=0; i<m; i++) { 3840 ii = i + rstart; 3841 nnz = Ii[i+1]- Ii[i]; 3842 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3843 } 3844 nooffprocentries = B->nooffprocentries; 3845 B->nooffprocentries = PETSC_TRUE; 3846 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3847 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3848 B->nooffprocentries = nooffprocentries; 3849 3850 if (!v) { 3851 ierr = PetscFree(values);CHKERRQ(ierr); 3852 } 3853 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3854 PetscFunctionReturn(0); 3855 } 3856 3857 /*@ 3858 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3859 (the default parallel PETSc format). 3860 3861 Collective on MPI_Comm 3862 3863 Input Parameters: 3864 + B - the matrix 3865 . i - the indices into j for the start of each local row (starts with zero) 3866 . j - the column indices for each local row (starts with zero) 3867 - v - optional values in the matrix 3868 3869 Level: developer 3870 3871 Notes: 3872 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3873 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3874 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3875 3876 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3877 3878 The format which is used for the sparse matrix input, is equivalent to a 3879 row-major ordering.. i.e for the following matrix, the input data expected is 3880 as shown 3881 3882 $ 1 0 0 3883 $ 2 0 3 P0 3884 $ ------- 3885 $ 4 5 6 P1 3886 $ 3887 $ Process0 [P0]: rows_owned=[0,1] 3888 $ i = {0,1,3} [size = nrow+1 = 2+1] 3889 $ j = {0,0,2} [size = 3] 3890 $ v = {1,2,3} [size = 3] 3891 $ 3892 $ Process1 [P1]: rows_owned=[2] 3893 $ i = {0,3} [size = nrow+1 = 1+1] 3894 $ j = {0,1,2} [size = 3] 3895 $ v = {4,5,6} [size = 3] 3896 3897 .keywords: matrix, aij, compressed row, sparse, parallel 3898 3899 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3900 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3901 @*/ 3902 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3903 { 3904 PetscErrorCode ierr; 3905 3906 PetscFunctionBegin; 3907 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3908 PetscFunctionReturn(0); 3909 } 3910 3911 /*@C 3912 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3913 (the default parallel PETSc format). For good matrix assembly performance 3914 the user should preallocate the matrix storage by setting the parameters 3915 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3916 performance can be increased by more than a factor of 50. 3917 3918 Collective on MPI_Comm 3919 3920 Input Parameters: 3921 + B - the matrix 3922 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3923 (same value is used for all local rows) 3924 . d_nnz - array containing the number of nonzeros in the various rows of the 3925 DIAGONAL portion of the local submatrix (possibly different for each row) 3926 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3927 The size of this array is equal to the number of local rows, i.e 'm'. 3928 For matrices that will be factored, you must leave room for (and set) 3929 the diagonal entry even if it is zero. 3930 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3931 submatrix (same value is used for all local rows). 3932 - o_nnz - array containing the number of nonzeros in the various rows of the 3933 OFF-DIAGONAL portion of the local submatrix (possibly different for 3934 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3935 structure. The size of this array is equal to the number 3936 of local rows, i.e 'm'. 3937 3938 If the *_nnz parameter is given then the *_nz parameter is ignored 3939 3940 The AIJ format (also called the Yale sparse matrix format or 3941 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3942 storage. The stored row and column indices begin with zero. 3943 See Users-Manual: ch_mat for details. 3944 3945 The parallel matrix is partitioned such that the first m0 rows belong to 3946 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3947 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3948 3949 The DIAGONAL portion of the local submatrix of a processor can be defined 3950 as the submatrix which is obtained by extraction the part corresponding to 3951 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3952 first row that belongs to the processor, r2 is the last row belonging to 3953 the this processor, and c1-c2 is range of indices of the local part of a 3954 vector suitable for applying the matrix to. This is an mxn matrix. In the 3955 common case of a square matrix, the row and column ranges are the same and 3956 the DIAGONAL part is also square. The remaining portion of the local 3957 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3958 3959 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3960 3961 You can call MatGetInfo() to get information on how effective the preallocation was; 3962 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3963 You can also run with the option -info and look for messages with the string 3964 malloc in them to see if additional memory allocation was needed. 3965 3966 Example usage: 3967 3968 Consider the following 8x8 matrix with 34 non-zero values, that is 3969 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3970 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3971 as follows: 3972 3973 .vb 3974 1 2 0 | 0 3 0 | 0 4 3975 Proc0 0 5 6 | 7 0 0 | 8 0 3976 9 0 10 | 11 0 0 | 12 0 3977 ------------------------------------- 3978 13 0 14 | 15 16 17 | 0 0 3979 Proc1 0 18 0 | 19 20 21 | 0 0 3980 0 0 0 | 22 23 0 | 24 0 3981 ------------------------------------- 3982 Proc2 25 26 27 | 0 0 28 | 29 0 3983 30 0 0 | 31 32 33 | 0 34 3984 .ve 3985 3986 This can be represented as a collection of submatrices as: 3987 3988 .vb 3989 A B C 3990 D E F 3991 G H I 3992 .ve 3993 3994 Where the submatrices A,B,C are owned by proc0, D,E,F are 3995 owned by proc1, G,H,I are owned by proc2. 3996 3997 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3998 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3999 The 'M','N' parameters are 8,8, and have the same values on all procs. 4000 4001 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4002 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4003 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4004 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4005 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4006 matrix, ans [DF] as another SeqAIJ matrix. 4007 4008 When d_nz, o_nz parameters are specified, d_nz storage elements are 4009 allocated for every row of the local diagonal submatrix, and o_nz 4010 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4011 One way to choose d_nz and o_nz is to use the max nonzerors per local 4012 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4013 In this case, the values of d_nz,o_nz are: 4014 .vb 4015 proc0 : dnz = 2, o_nz = 2 4016 proc1 : dnz = 3, o_nz = 2 4017 proc2 : dnz = 1, o_nz = 4 4018 .ve 4019 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4020 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4021 for proc3. i.e we are using 12+15+10=37 storage locations to store 4022 34 values. 4023 4024 When d_nnz, o_nnz parameters are specified, the storage is specified 4025 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4026 In the above case the values for d_nnz,o_nnz are: 4027 .vb 4028 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4029 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4030 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4031 .ve 4032 Here the space allocated is sum of all the above values i.e 34, and 4033 hence pre-allocation is perfect. 4034 4035 Level: intermediate 4036 4037 .keywords: matrix, aij, compressed row, sparse, parallel 4038 4039 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4040 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4041 @*/ 4042 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4043 { 4044 PetscErrorCode ierr; 4045 4046 PetscFunctionBegin; 4047 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4048 PetscValidType(B,1); 4049 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4050 PetscFunctionReturn(0); 4051 } 4052 4053 /*@ 4054 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4055 CSR format the local rows. 4056 4057 Collective on MPI_Comm 4058 4059 Input Parameters: 4060 + comm - MPI communicator 4061 . m - number of local rows (Cannot be PETSC_DECIDE) 4062 . n - This value should be the same as the local size used in creating the 4063 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4064 calculated if N is given) For square matrices n is almost always m. 4065 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4066 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4067 . i - row indices 4068 . j - column indices 4069 - a - matrix values 4070 4071 Output Parameter: 4072 . mat - the matrix 4073 4074 Level: intermediate 4075 4076 Notes: 4077 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4078 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4079 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4080 4081 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4082 4083 The format which is used for the sparse matrix input, is equivalent to a 4084 row-major ordering.. i.e for the following matrix, the input data expected is 4085 as shown 4086 4087 $ 1 0 0 4088 $ 2 0 3 P0 4089 $ ------- 4090 $ 4 5 6 P1 4091 $ 4092 $ Process0 [P0]: rows_owned=[0,1] 4093 $ i = {0,1,3} [size = nrow+1 = 2+1] 4094 $ j = {0,0,2} [size = 3] 4095 $ v = {1,2,3} [size = 3] 4096 $ 4097 $ Process1 [P1]: rows_owned=[2] 4098 $ i = {0,3} [size = nrow+1 = 1+1] 4099 $ j = {0,1,2} [size = 3] 4100 $ v = {4,5,6} [size = 3] 4101 4102 .keywords: matrix, aij, compressed row, sparse, parallel 4103 4104 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4105 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4106 @*/ 4107 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4108 { 4109 PetscErrorCode ierr; 4110 4111 PetscFunctionBegin; 4112 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4113 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4114 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4115 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4116 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4117 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4118 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4119 PetscFunctionReturn(0); 4120 } 4121 4122 /*@C 4123 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4124 (the default parallel PETSc format). For good matrix assembly performance 4125 the user should preallocate the matrix storage by setting the parameters 4126 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4127 performance can be increased by more than a factor of 50. 4128 4129 Collective on MPI_Comm 4130 4131 Input Parameters: 4132 + comm - MPI communicator 4133 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4134 This value should be the same as the local size used in creating the 4135 y vector for the matrix-vector product y = Ax. 4136 . n - This value should be the same as the local size used in creating the 4137 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4138 calculated if N is given) For square matrices n is almost always m. 4139 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4140 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4141 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4142 (same value is used for all local rows) 4143 . d_nnz - array containing the number of nonzeros in the various rows of the 4144 DIAGONAL portion of the local submatrix (possibly different for each row) 4145 or NULL, if d_nz is used to specify the nonzero structure. 4146 The size of this array is equal to the number of local rows, i.e 'm'. 4147 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4148 submatrix (same value is used for all local rows). 4149 - o_nnz - array containing the number of nonzeros in the various rows of the 4150 OFF-DIAGONAL portion of the local submatrix (possibly different for 4151 each row) or NULL, if o_nz is used to specify the nonzero 4152 structure. The size of this array is equal to the number 4153 of local rows, i.e 'm'. 4154 4155 Output Parameter: 4156 . A - the matrix 4157 4158 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4159 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4160 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4161 4162 Notes: 4163 If the *_nnz parameter is given then the *_nz parameter is ignored 4164 4165 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4166 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4167 storage requirements for this matrix. 4168 4169 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4170 processor than it must be used on all processors that share the object for 4171 that argument. 4172 4173 The user MUST specify either the local or global matrix dimensions 4174 (possibly both). 4175 4176 The parallel matrix is partitioned across processors such that the 4177 first m0 rows belong to process 0, the next m1 rows belong to 4178 process 1, the next m2 rows belong to process 2 etc.. where 4179 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4180 values corresponding to [m x N] submatrix. 4181 4182 The columns are logically partitioned with the n0 columns belonging 4183 to 0th partition, the next n1 columns belonging to the next 4184 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4185 4186 The DIAGONAL portion of the local submatrix on any given processor 4187 is the submatrix corresponding to the rows and columns m,n 4188 corresponding to the given processor. i.e diagonal matrix on 4189 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4190 etc. The remaining portion of the local submatrix [m x (N-n)] 4191 constitute the OFF-DIAGONAL portion. The example below better 4192 illustrates this concept. 4193 4194 For a square global matrix we define each processor's diagonal portion 4195 to be its local rows and the corresponding columns (a square submatrix); 4196 each processor's off-diagonal portion encompasses the remainder of the 4197 local matrix (a rectangular submatrix). 4198 4199 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4200 4201 When calling this routine with a single process communicator, a matrix of 4202 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4203 type of communicator, use the construction mechanism 4204 .vb 4205 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4206 .ve 4207 4208 $ MatCreate(...,&A); 4209 $ MatSetType(A,MATMPIAIJ); 4210 $ MatSetSizes(A, m,n,M,N); 4211 $ MatMPIAIJSetPreallocation(A,...); 4212 4213 By default, this format uses inodes (identical nodes) when possible. 4214 We search for consecutive rows with the same nonzero structure, thereby 4215 reusing matrix information to achieve increased efficiency. 4216 4217 Options Database Keys: 4218 + -mat_no_inode - Do not use inodes 4219 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4220 4221 4222 4223 Example usage: 4224 4225 Consider the following 8x8 matrix with 34 non-zero values, that is 4226 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4227 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4228 as follows 4229 4230 .vb 4231 1 2 0 | 0 3 0 | 0 4 4232 Proc0 0 5 6 | 7 0 0 | 8 0 4233 9 0 10 | 11 0 0 | 12 0 4234 ------------------------------------- 4235 13 0 14 | 15 16 17 | 0 0 4236 Proc1 0 18 0 | 19 20 21 | 0 0 4237 0 0 0 | 22 23 0 | 24 0 4238 ------------------------------------- 4239 Proc2 25 26 27 | 0 0 28 | 29 0 4240 30 0 0 | 31 32 33 | 0 34 4241 .ve 4242 4243 This can be represented as a collection of submatrices as 4244 4245 .vb 4246 A B C 4247 D E F 4248 G H I 4249 .ve 4250 4251 Where the submatrices A,B,C are owned by proc0, D,E,F are 4252 owned by proc1, G,H,I are owned by proc2. 4253 4254 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4255 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4256 The 'M','N' parameters are 8,8, and have the same values on all procs. 4257 4258 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4259 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4260 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4261 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4262 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4263 matrix, ans [DF] as another SeqAIJ matrix. 4264 4265 When d_nz, o_nz parameters are specified, d_nz storage elements are 4266 allocated for every row of the local diagonal submatrix, and o_nz 4267 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4268 One way to choose d_nz and o_nz is to use the max nonzerors per local 4269 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4270 In this case, the values of d_nz,o_nz are 4271 .vb 4272 proc0 : dnz = 2, o_nz = 2 4273 proc1 : dnz = 3, o_nz = 2 4274 proc2 : dnz = 1, o_nz = 4 4275 .ve 4276 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4277 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4278 for proc3. i.e we are using 12+15+10=37 storage locations to store 4279 34 values. 4280 4281 When d_nnz, o_nnz parameters are specified, the storage is specified 4282 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4283 In the above case the values for d_nnz,o_nnz are 4284 .vb 4285 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4286 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4287 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4288 .ve 4289 Here the space allocated is sum of all the above values i.e 34, and 4290 hence pre-allocation is perfect. 4291 4292 Level: intermediate 4293 4294 .keywords: matrix, aij, compressed row, sparse, parallel 4295 4296 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4297 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4298 @*/ 4299 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4300 { 4301 PetscErrorCode ierr; 4302 PetscMPIInt size; 4303 4304 PetscFunctionBegin; 4305 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4306 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4307 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4308 if (size > 1) { 4309 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4310 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4311 } else { 4312 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4313 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4314 } 4315 PetscFunctionReturn(0); 4316 } 4317 4318 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4319 { 4320 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4321 PetscBool flg; 4322 PetscErrorCode ierr; 4323 4324 PetscFunctionBegin; 4325 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4326 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4327 if (Ad) *Ad = a->A; 4328 if (Ao) *Ao = a->B; 4329 if (colmap) *colmap = a->garray; 4330 PetscFunctionReturn(0); 4331 } 4332 4333 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4334 { 4335 PetscErrorCode ierr; 4336 PetscInt m,N,i,rstart,nnz,Ii; 4337 PetscInt *indx; 4338 PetscScalar *values; 4339 4340 PetscFunctionBegin; 4341 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4342 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4343 PetscInt *dnz,*onz,sum,bs,cbs; 4344 4345 if (n == PETSC_DECIDE) { 4346 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4347 } 4348 /* Check sum(n) = N */ 4349 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4350 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4351 4352 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4353 rstart -= m; 4354 4355 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4356 for (i=0; i<m; i++) { 4357 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4358 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4359 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4360 } 4361 4362 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4363 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4364 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4365 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4366 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4367 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4368 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4369 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4370 } 4371 4372 /* numeric phase */ 4373 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4374 for (i=0; i<m; i++) { 4375 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4376 Ii = i + rstart; 4377 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4378 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4379 } 4380 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4381 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4382 PetscFunctionReturn(0); 4383 } 4384 4385 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4386 { 4387 PetscErrorCode ierr; 4388 PetscMPIInt rank; 4389 PetscInt m,N,i,rstart,nnz; 4390 size_t len; 4391 const PetscInt *indx; 4392 PetscViewer out; 4393 char *name; 4394 Mat B; 4395 const PetscScalar *values; 4396 4397 PetscFunctionBegin; 4398 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4399 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4400 /* Should this be the type of the diagonal block of A? */ 4401 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4402 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4403 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4404 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4405 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4406 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4407 for (i=0; i<m; i++) { 4408 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4409 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4410 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4411 } 4412 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4413 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4414 4415 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4416 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4417 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4418 sprintf(name,"%s.%d",outfile,rank); 4419 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4420 ierr = PetscFree(name);CHKERRQ(ierr); 4421 ierr = MatView(B,out);CHKERRQ(ierr); 4422 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4423 ierr = MatDestroy(&B);CHKERRQ(ierr); 4424 PetscFunctionReturn(0); 4425 } 4426 4427 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4428 { 4429 PetscErrorCode ierr; 4430 Mat_Merge_SeqsToMPI *merge; 4431 PetscContainer container; 4432 4433 PetscFunctionBegin; 4434 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4435 if (container) { 4436 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4437 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4438 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4439 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4440 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4441 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4442 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4443 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4444 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4445 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4446 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4447 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4448 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4449 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4450 ierr = PetscFree(merge);CHKERRQ(ierr); 4451 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4452 } 4453 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4454 PetscFunctionReturn(0); 4455 } 4456 4457 #include <../src/mat/utils/freespace.h> 4458 #include <petscbt.h> 4459 4460 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4461 { 4462 PetscErrorCode ierr; 4463 MPI_Comm comm; 4464 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4465 PetscMPIInt size,rank,taga,*len_s; 4466 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4467 PetscInt proc,m; 4468 PetscInt **buf_ri,**buf_rj; 4469 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4470 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4471 MPI_Request *s_waits,*r_waits; 4472 MPI_Status *status; 4473 MatScalar *aa=a->a; 4474 MatScalar **abuf_r,*ba_i; 4475 Mat_Merge_SeqsToMPI *merge; 4476 PetscContainer container; 4477 4478 PetscFunctionBegin; 4479 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4480 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4481 4482 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4483 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4484 4485 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4486 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4487 4488 bi = merge->bi; 4489 bj = merge->bj; 4490 buf_ri = merge->buf_ri; 4491 buf_rj = merge->buf_rj; 4492 4493 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4494 owners = merge->rowmap->range; 4495 len_s = merge->len_s; 4496 4497 /* send and recv matrix values */ 4498 /*-----------------------------*/ 4499 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4500 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4501 4502 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4503 for (proc=0,k=0; proc<size; proc++) { 4504 if (!len_s[proc]) continue; 4505 i = owners[proc]; 4506 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4507 k++; 4508 } 4509 4510 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4511 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4512 ierr = PetscFree(status);CHKERRQ(ierr); 4513 4514 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4515 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4516 4517 /* insert mat values of mpimat */ 4518 /*----------------------------*/ 4519 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4520 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4521 4522 for (k=0; k<merge->nrecv; k++) { 4523 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4524 nrows = *(buf_ri_k[k]); 4525 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4526 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4527 } 4528 4529 /* set values of ba */ 4530 m = merge->rowmap->n; 4531 for (i=0; i<m; i++) { 4532 arow = owners[rank] + i; 4533 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4534 bnzi = bi[i+1] - bi[i]; 4535 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4536 4537 /* add local non-zero vals of this proc's seqmat into ba */ 4538 anzi = ai[arow+1] - ai[arow]; 4539 aj = a->j + ai[arow]; 4540 aa = a->a + ai[arow]; 4541 nextaj = 0; 4542 for (j=0; nextaj<anzi; j++) { 4543 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4544 ba_i[j] += aa[nextaj++]; 4545 } 4546 } 4547 4548 /* add received vals into ba */ 4549 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4550 /* i-th row */ 4551 if (i == *nextrow[k]) { 4552 anzi = *(nextai[k]+1) - *nextai[k]; 4553 aj = buf_rj[k] + *(nextai[k]); 4554 aa = abuf_r[k] + *(nextai[k]); 4555 nextaj = 0; 4556 for (j=0; nextaj<anzi; j++) { 4557 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4558 ba_i[j] += aa[nextaj++]; 4559 } 4560 } 4561 nextrow[k]++; nextai[k]++; 4562 } 4563 } 4564 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4565 } 4566 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4567 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4568 4569 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4570 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4571 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4572 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4573 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4574 PetscFunctionReturn(0); 4575 } 4576 4577 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4578 { 4579 PetscErrorCode ierr; 4580 Mat B_mpi; 4581 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4582 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4583 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4584 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4585 PetscInt len,proc,*dnz,*onz,bs,cbs; 4586 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4587 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4588 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4589 MPI_Status *status; 4590 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4591 PetscBT lnkbt; 4592 Mat_Merge_SeqsToMPI *merge; 4593 PetscContainer container; 4594 4595 PetscFunctionBegin; 4596 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4597 4598 /* make sure it is a PETSc comm */ 4599 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4600 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4601 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4602 4603 ierr = PetscNew(&merge);CHKERRQ(ierr); 4604 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4605 4606 /* determine row ownership */ 4607 /*---------------------------------------------------------*/ 4608 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4609 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4610 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4611 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4612 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4613 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4614 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4615 4616 m = merge->rowmap->n; 4617 owners = merge->rowmap->range; 4618 4619 /* determine the number of messages to send, their lengths */ 4620 /*---------------------------------------------------------*/ 4621 len_s = merge->len_s; 4622 4623 len = 0; /* length of buf_si[] */ 4624 merge->nsend = 0; 4625 for (proc=0; proc<size; proc++) { 4626 len_si[proc] = 0; 4627 if (proc == rank) { 4628 len_s[proc] = 0; 4629 } else { 4630 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4631 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4632 } 4633 if (len_s[proc]) { 4634 merge->nsend++; 4635 nrows = 0; 4636 for (i=owners[proc]; i<owners[proc+1]; i++) { 4637 if (ai[i+1] > ai[i]) nrows++; 4638 } 4639 len_si[proc] = 2*(nrows+1); 4640 len += len_si[proc]; 4641 } 4642 } 4643 4644 /* determine the number and length of messages to receive for ij-structure */ 4645 /*-------------------------------------------------------------------------*/ 4646 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4647 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4648 4649 /* post the Irecv of j-structure */ 4650 /*-------------------------------*/ 4651 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4652 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4653 4654 /* post the Isend of j-structure */ 4655 /*--------------------------------*/ 4656 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4657 4658 for (proc=0, k=0; proc<size; proc++) { 4659 if (!len_s[proc]) continue; 4660 i = owners[proc]; 4661 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4662 k++; 4663 } 4664 4665 /* receives and sends of j-structure are complete */ 4666 /*------------------------------------------------*/ 4667 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4668 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4669 4670 /* send and recv i-structure */ 4671 /*---------------------------*/ 4672 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4673 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4674 4675 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4676 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4677 for (proc=0,k=0; proc<size; proc++) { 4678 if (!len_s[proc]) continue; 4679 /* form outgoing message for i-structure: 4680 buf_si[0]: nrows to be sent 4681 [1:nrows]: row index (global) 4682 [nrows+1:2*nrows+1]: i-structure index 4683 */ 4684 /*-------------------------------------------*/ 4685 nrows = len_si[proc]/2 - 1; 4686 buf_si_i = buf_si + nrows+1; 4687 buf_si[0] = nrows; 4688 buf_si_i[0] = 0; 4689 nrows = 0; 4690 for (i=owners[proc]; i<owners[proc+1]; i++) { 4691 anzi = ai[i+1] - ai[i]; 4692 if (anzi) { 4693 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4694 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4695 nrows++; 4696 } 4697 } 4698 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4699 k++; 4700 buf_si += len_si[proc]; 4701 } 4702 4703 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4704 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4705 4706 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4707 for (i=0; i<merge->nrecv; i++) { 4708 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4709 } 4710 4711 ierr = PetscFree(len_si);CHKERRQ(ierr); 4712 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4713 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4714 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4715 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4716 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4717 ierr = PetscFree(status);CHKERRQ(ierr); 4718 4719 /* compute a local seq matrix in each processor */ 4720 /*----------------------------------------------*/ 4721 /* allocate bi array and free space for accumulating nonzero column info */ 4722 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4723 bi[0] = 0; 4724 4725 /* create and initialize a linked list */ 4726 nlnk = N+1; 4727 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4728 4729 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4730 len = ai[owners[rank+1]] - ai[owners[rank]]; 4731 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4732 4733 current_space = free_space; 4734 4735 /* determine symbolic info for each local row */ 4736 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4737 4738 for (k=0; k<merge->nrecv; k++) { 4739 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4740 nrows = *buf_ri_k[k]; 4741 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4742 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4743 } 4744 4745 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4746 len = 0; 4747 for (i=0; i<m; i++) { 4748 bnzi = 0; 4749 /* add local non-zero cols of this proc's seqmat into lnk */ 4750 arow = owners[rank] + i; 4751 anzi = ai[arow+1] - ai[arow]; 4752 aj = a->j + ai[arow]; 4753 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4754 bnzi += nlnk; 4755 /* add received col data into lnk */ 4756 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4757 if (i == *nextrow[k]) { /* i-th row */ 4758 anzi = *(nextai[k]+1) - *nextai[k]; 4759 aj = buf_rj[k] + *nextai[k]; 4760 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4761 bnzi += nlnk; 4762 nextrow[k]++; nextai[k]++; 4763 } 4764 } 4765 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4766 4767 /* if free space is not available, make more free space */ 4768 if (current_space->local_remaining<bnzi) { 4769 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4770 nspacedouble++; 4771 } 4772 /* copy data into free space, then initialize lnk */ 4773 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4774 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4775 4776 current_space->array += bnzi; 4777 current_space->local_used += bnzi; 4778 current_space->local_remaining -= bnzi; 4779 4780 bi[i+1] = bi[i] + bnzi; 4781 } 4782 4783 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4784 4785 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4786 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4787 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4788 4789 /* create symbolic parallel matrix B_mpi */ 4790 /*---------------------------------------*/ 4791 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4792 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4793 if (n==PETSC_DECIDE) { 4794 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4795 } else { 4796 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4797 } 4798 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4799 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4800 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4801 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4802 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4803 4804 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4805 B_mpi->assembled = PETSC_FALSE; 4806 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4807 merge->bi = bi; 4808 merge->bj = bj; 4809 merge->buf_ri = buf_ri; 4810 merge->buf_rj = buf_rj; 4811 merge->coi = NULL; 4812 merge->coj = NULL; 4813 merge->owners_co = NULL; 4814 4815 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4816 4817 /* attach the supporting struct to B_mpi for reuse */ 4818 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4819 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4820 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4821 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4822 *mpimat = B_mpi; 4823 4824 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4825 PetscFunctionReturn(0); 4826 } 4827 4828 /*@C 4829 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4830 matrices from each processor 4831 4832 Collective on MPI_Comm 4833 4834 Input Parameters: 4835 + comm - the communicators the parallel matrix will live on 4836 . seqmat - the input sequential matrices 4837 . m - number of local rows (or PETSC_DECIDE) 4838 . n - number of local columns (or PETSC_DECIDE) 4839 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4840 4841 Output Parameter: 4842 . mpimat - the parallel matrix generated 4843 4844 Level: advanced 4845 4846 Notes: 4847 The dimensions of the sequential matrix in each processor MUST be the same. 4848 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4849 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4850 @*/ 4851 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4852 { 4853 PetscErrorCode ierr; 4854 PetscMPIInt size; 4855 4856 PetscFunctionBegin; 4857 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4858 if (size == 1) { 4859 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4860 if (scall == MAT_INITIAL_MATRIX) { 4861 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4862 } else { 4863 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4864 } 4865 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4866 PetscFunctionReturn(0); 4867 } 4868 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4869 if (scall == MAT_INITIAL_MATRIX) { 4870 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4871 } 4872 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4873 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4874 PetscFunctionReturn(0); 4875 } 4876 4877 /*@ 4878 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4879 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4880 with MatGetSize() 4881 4882 Not Collective 4883 4884 Input Parameters: 4885 + A - the matrix 4886 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4887 4888 Output Parameter: 4889 . A_loc - the local sequential matrix generated 4890 4891 Level: developer 4892 4893 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4894 4895 @*/ 4896 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4897 { 4898 PetscErrorCode ierr; 4899 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4900 Mat_SeqAIJ *mat,*a,*b; 4901 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4902 MatScalar *aa,*ba,*cam; 4903 PetscScalar *ca; 4904 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4905 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4906 PetscBool match; 4907 MPI_Comm comm; 4908 PetscMPIInt size; 4909 4910 PetscFunctionBegin; 4911 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4912 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4913 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4914 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4915 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4916 4917 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4918 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4919 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4920 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4921 aa = a->a; ba = b->a; 4922 if (scall == MAT_INITIAL_MATRIX) { 4923 if (size == 1) { 4924 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4925 PetscFunctionReturn(0); 4926 } 4927 4928 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4929 ci[0] = 0; 4930 for (i=0; i<am; i++) { 4931 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4932 } 4933 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4934 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4935 k = 0; 4936 for (i=0; i<am; i++) { 4937 ncols_o = bi[i+1] - bi[i]; 4938 ncols_d = ai[i+1] - ai[i]; 4939 /* off-diagonal portion of A */ 4940 for (jo=0; jo<ncols_o; jo++) { 4941 col = cmap[*bj]; 4942 if (col >= cstart) break; 4943 cj[k] = col; bj++; 4944 ca[k++] = *ba++; 4945 } 4946 /* diagonal portion of A */ 4947 for (j=0; j<ncols_d; j++) { 4948 cj[k] = cstart + *aj++; 4949 ca[k++] = *aa++; 4950 } 4951 /* off-diagonal portion of A */ 4952 for (j=jo; j<ncols_o; j++) { 4953 cj[k] = cmap[*bj++]; 4954 ca[k++] = *ba++; 4955 } 4956 } 4957 /* put together the new matrix */ 4958 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4959 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4960 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4961 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4962 mat->free_a = PETSC_TRUE; 4963 mat->free_ij = PETSC_TRUE; 4964 mat->nonew = 0; 4965 } else if (scall == MAT_REUSE_MATRIX) { 4966 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4967 ci = mat->i; cj = mat->j; cam = mat->a; 4968 for (i=0; i<am; i++) { 4969 /* off-diagonal portion of A */ 4970 ncols_o = bi[i+1] - bi[i]; 4971 for (jo=0; jo<ncols_o; jo++) { 4972 col = cmap[*bj]; 4973 if (col >= cstart) break; 4974 *cam++ = *ba++; bj++; 4975 } 4976 /* diagonal portion of A */ 4977 ncols_d = ai[i+1] - ai[i]; 4978 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4979 /* off-diagonal portion of A */ 4980 for (j=jo; j<ncols_o; j++) { 4981 *cam++ = *ba++; bj++; 4982 } 4983 } 4984 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4985 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4986 PetscFunctionReturn(0); 4987 } 4988 4989 /*@C 4990 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4991 4992 Not Collective 4993 4994 Input Parameters: 4995 + A - the matrix 4996 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4997 - row, col - index sets of rows and columns to extract (or NULL) 4998 4999 Output Parameter: 5000 . A_loc - the local sequential matrix generated 5001 5002 Level: developer 5003 5004 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5005 5006 @*/ 5007 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5008 { 5009 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5010 PetscErrorCode ierr; 5011 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5012 IS isrowa,iscola; 5013 Mat *aloc; 5014 PetscBool match; 5015 5016 PetscFunctionBegin; 5017 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5018 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5019 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5020 if (!row) { 5021 start = A->rmap->rstart; end = A->rmap->rend; 5022 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5023 } else { 5024 isrowa = *row; 5025 } 5026 if (!col) { 5027 start = A->cmap->rstart; 5028 cmap = a->garray; 5029 nzA = a->A->cmap->n; 5030 nzB = a->B->cmap->n; 5031 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5032 ncols = 0; 5033 for (i=0; i<nzB; i++) { 5034 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5035 else break; 5036 } 5037 imark = i; 5038 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5039 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5040 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5041 } else { 5042 iscola = *col; 5043 } 5044 if (scall != MAT_INITIAL_MATRIX) { 5045 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5046 aloc[0] = *A_loc; 5047 } 5048 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5049 if (!col) { /* attach global id of condensed columns */ 5050 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5051 } 5052 *A_loc = aloc[0]; 5053 ierr = PetscFree(aloc);CHKERRQ(ierr); 5054 if (!row) { 5055 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5056 } 5057 if (!col) { 5058 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5059 } 5060 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5061 PetscFunctionReturn(0); 5062 } 5063 5064 /*@C 5065 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5066 5067 Collective on Mat 5068 5069 Input Parameters: 5070 + A,B - the matrices in mpiaij format 5071 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5072 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5073 5074 Output Parameter: 5075 + rowb, colb - index sets of rows and columns of B to extract 5076 - B_seq - the sequential matrix generated 5077 5078 Level: developer 5079 5080 @*/ 5081 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5082 { 5083 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5084 PetscErrorCode ierr; 5085 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5086 IS isrowb,iscolb; 5087 Mat *bseq=NULL; 5088 5089 PetscFunctionBegin; 5090 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5091 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5092 } 5093 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5094 5095 if (scall == MAT_INITIAL_MATRIX) { 5096 start = A->cmap->rstart; 5097 cmap = a->garray; 5098 nzA = a->A->cmap->n; 5099 nzB = a->B->cmap->n; 5100 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5101 ncols = 0; 5102 for (i=0; i<nzB; i++) { /* row < local row index */ 5103 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5104 else break; 5105 } 5106 imark = i; 5107 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5108 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5109 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5110 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5111 } else { 5112 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5113 isrowb = *rowb; iscolb = *colb; 5114 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5115 bseq[0] = *B_seq; 5116 } 5117 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5118 *B_seq = bseq[0]; 5119 ierr = PetscFree(bseq);CHKERRQ(ierr); 5120 if (!rowb) { 5121 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5122 } else { 5123 *rowb = isrowb; 5124 } 5125 if (!colb) { 5126 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5127 } else { 5128 *colb = iscolb; 5129 } 5130 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5131 PetscFunctionReturn(0); 5132 } 5133 5134 /* 5135 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5136 of the OFF-DIAGONAL portion of local A 5137 5138 Collective on Mat 5139 5140 Input Parameters: 5141 + A,B - the matrices in mpiaij format 5142 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5143 5144 Output Parameter: 5145 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5146 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5147 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5148 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5149 5150 Level: developer 5151 5152 */ 5153 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5154 { 5155 VecScatter_MPI_General *gen_to,*gen_from; 5156 PetscErrorCode ierr; 5157 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5158 Mat_SeqAIJ *b_oth; 5159 VecScatter ctx; 5160 MPI_Comm comm; 5161 PetscMPIInt *rprocs,*sprocs,tag,rank; 5162 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5163 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5164 PetscScalar *b_otha,*bufa,*bufA,*vals; 5165 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5166 MPI_Request *rwaits = NULL,*swaits = NULL; 5167 MPI_Status *sstatus,rstatus; 5168 PetscMPIInt jj,size; 5169 VecScatterType type; 5170 PetscBool mpi1; 5171 5172 PetscFunctionBegin; 5173 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5175 5176 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5177 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5178 } 5179 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5180 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5181 5182 if (size == 1) { 5183 startsj_s = NULL; 5184 bufa_ptr = NULL; 5185 *B_oth = NULL; 5186 PetscFunctionReturn(0); 5187 } 5188 5189 ctx = a->Mvctx; 5190 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5191 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5192 if (!mpi1) { 5193 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5194 thus create a->Mvctx_mpi1 */ 5195 if (!a->Mvctx_mpi1) { 5196 a->Mvctx_mpi1_flg = PETSC_TRUE; 5197 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5198 } 5199 ctx = a->Mvctx_mpi1; 5200 } 5201 tag = ((PetscObject)ctx)->tag; 5202 5203 gen_to = (VecScatter_MPI_General*)ctx->todata; 5204 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5205 nrecvs = gen_from->n; 5206 nsends = gen_to->n; 5207 5208 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5209 srow = gen_to->indices; /* local row index to be sent */ 5210 sstarts = gen_to->starts; 5211 sprocs = gen_to->procs; 5212 sstatus = gen_to->sstatus; 5213 sbs = gen_to->bs; 5214 rstarts = gen_from->starts; 5215 rprocs = gen_from->procs; 5216 rbs = gen_from->bs; 5217 5218 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5219 if (scall == MAT_INITIAL_MATRIX) { 5220 /* i-array */ 5221 /*---------*/ 5222 /* post receives */ 5223 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5224 for (i=0; i<nrecvs; i++) { 5225 rowlen = rvalues + rstarts[i]*rbs; 5226 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5227 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5228 } 5229 5230 /* pack the outgoing message */ 5231 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5232 5233 sstartsj[0] = 0; 5234 rstartsj[0] = 0; 5235 len = 0; /* total length of j or a array to be sent */ 5236 k = 0; 5237 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5238 for (i=0; i<nsends; i++) { 5239 rowlen = svalues + sstarts[i]*sbs; 5240 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5241 for (j=0; j<nrows; j++) { 5242 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5243 for (l=0; l<sbs; l++) { 5244 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5245 5246 rowlen[j*sbs+l] = ncols; 5247 5248 len += ncols; 5249 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5250 } 5251 k++; 5252 } 5253 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5254 5255 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5256 } 5257 /* recvs and sends of i-array are completed */ 5258 i = nrecvs; 5259 while (i--) { 5260 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5261 } 5262 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5263 ierr = PetscFree(svalues);CHKERRQ(ierr); 5264 5265 /* allocate buffers for sending j and a arrays */ 5266 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5267 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5268 5269 /* create i-array of B_oth */ 5270 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5271 5272 b_othi[0] = 0; 5273 len = 0; /* total length of j or a array to be received */ 5274 k = 0; 5275 for (i=0; i<nrecvs; i++) { 5276 rowlen = rvalues + rstarts[i]*rbs; 5277 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5278 for (j=0; j<nrows; j++) { 5279 b_othi[k+1] = b_othi[k] + rowlen[j]; 5280 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5281 k++; 5282 } 5283 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5284 } 5285 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5286 5287 /* allocate space for j and a arrrays of B_oth */ 5288 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5289 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5290 5291 /* j-array */ 5292 /*---------*/ 5293 /* post receives of j-array */ 5294 for (i=0; i<nrecvs; i++) { 5295 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5296 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5297 } 5298 5299 /* pack the outgoing message j-array */ 5300 k = 0; 5301 for (i=0; i<nsends; i++) { 5302 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5303 bufJ = bufj+sstartsj[i]; 5304 for (j=0; j<nrows; j++) { 5305 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5306 for (ll=0; ll<sbs; ll++) { 5307 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5308 for (l=0; l<ncols; l++) { 5309 *bufJ++ = cols[l]; 5310 } 5311 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5312 } 5313 } 5314 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5315 } 5316 5317 /* recvs and sends of j-array are completed */ 5318 i = nrecvs; 5319 while (i--) { 5320 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5321 } 5322 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5323 } else if (scall == MAT_REUSE_MATRIX) { 5324 sstartsj = *startsj_s; 5325 rstartsj = *startsj_r; 5326 bufa = *bufa_ptr; 5327 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5328 b_otha = b_oth->a; 5329 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5330 5331 /* a-array */ 5332 /*---------*/ 5333 /* post receives of a-array */ 5334 for (i=0; i<nrecvs; i++) { 5335 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5336 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5337 } 5338 5339 /* pack the outgoing message a-array */ 5340 k = 0; 5341 for (i=0; i<nsends; i++) { 5342 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5343 bufA = bufa+sstartsj[i]; 5344 for (j=0; j<nrows; j++) { 5345 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5346 for (ll=0; ll<sbs; ll++) { 5347 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5348 for (l=0; l<ncols; l++) { 5349 *bufA++ = vals[l]; 5350 } 5351 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5352 } 5353 } 5354 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5355 } 5356 /* recvs and sends of a-array are completed */ 5357 i = nrecvs; 5358 while (i--) { 5359 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5360 } 5361 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5362 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5363 5364 if (scall == MAT_INITIAL_MATRIX) { 5365 /* put together the new matrix */ 5366 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5367 5368 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5369 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5370 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5371 b_oth->free_a = PETSC_TRUE; 5372 b_oth->free_ij = PETSC_TRUE; 5373 b_oth->nonew = 0; 5374 5375 ierr = PetscFree(bufj);CHKERRQ(ierr); 5376 if (!startsj_s || !bufa_ptr) { 5377 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5378 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5379 } else { 5380 *startsj_s = sstartsj; 5381 *startsj_r = rstartsj; 5382 *bufa_ptr = bufa; 5383 } 5384 } 5385 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5386 PetscFunctionReturn(0); 5387 } 5388 5389 /*@C 5390 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5391 5392 Not Collective 5393 5394 Input Parameters: 5395 . A - The matrix in mpiaij format 5396 5397 Output Parameter: 5398 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5399 . colmap - A map from global column index to local index into lvec 5400 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5401 5402 Level: developer 5403 5404 @*/ 5405 #if defined(PETSC_USE_CTABLE) 5406 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5407 #else 5408 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5409 #endif 5410 { 5411 Mat_MPIAIJ *a; 5412 5413 PetscFunctionBegin; 5414 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5415 PetscValidPointer(lvec, 2); 5416 PetscValidPointer(colmap, 3); 5417 PetscValidPointer(multScatter, 4); 5418 a = (Mat_MPIAIJ*) A->data; 5419 if (lvec) *lvec = a->lvec; 5420 if (colmap) *colmap = a->colmap; 5421 if (multScatter) *multScatter = a->Mvctx; 5422 PetscFunctionReturn(0); 5423 } 5424 5425 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5426 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5427 #if defined(PETSC_HAVE_MKL_SPARSE) 5428 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5429 #endif 5430 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5431 #if defined(PETSC_HAVE_ELEMENTAL) 5432 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5433 #endif 5434 #if defined(PETSC_HAVE_HYPRE) 5435 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5436 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5437 #endif 5438 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5439 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5440 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5441 5442 /* 5443 Computes (B'*A')' since computing B*A directly is untenable 5444 5445 n p p 5446 ( ) ( ) ( ) 5447 m ( A ) * n ( B ) = m ( C ) 5448 ( ) ( ) ( ) 5449 5450 */ 5451 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5452 { 5453 PetscErrorCode ierr; 5454 Mat At,Bt,Ct; 5455 5456 PetscFunctionBegin; 5457 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5458 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5459 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5460 ierr = MatDestroy(&At);CHKERRQ(ierr); 5461 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5462 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5463 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5464 PetscFunctionReturn(0); 5465 } 5466 5467 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5468 { 5469 PetscErrorCode ierr; 5470 PetscInt m=A->rmap->n,n=B->cmap->n; 5471 Mat Cmat; 5472 5473 PetscFunctionBegin; 5474 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5475 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5476 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5477 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5478 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5479 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5480 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5481 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5482 5483 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5484 5485 *C = Cmat; 5486 PetscFunctionReturn(0); 5487 } 5488 5489 /* ----------------------------------------------------------------*/ 5490 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5491 { 5492 PetscErrorCode ierr; 5493 5494 PetscFunctionBegin; 5495 if (scall == MAT_INITIAL_MATRIX) { 5496 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5497 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5498 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5499 } 5500 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5501 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5502 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5503 PetscFunctionReturn(0); 5504 } 5505 5506 /*MC 5507 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5508 5509 Options Database Keys: 5510 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5511 5512 Level: beginner 5513 5514 .seealso: MatCreateAIJ() 5515 M*/ 5516 5517 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5518 { 5519 Mat_MPIAIJ *b; 5520 PetscErrorCode ierr; 5521 PetscMPIInt size; 5522 5523 PetscFunctionBegin; 5524 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5525 5526 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5527 B->data = (void*)b; 5528 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5529 B->assembled = PETSC_FALSE; 5530 B->insertmode = NOT_SET_VALUES; 5531 b->size = size; 5532 5533 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5534 5535 /* build cache for off array entries formed */ 5536 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5537 5538 b->donotstash = PETSC_FALSE; 5539 b->colmap = 0; 5540 b->garray = 0; 5541 b->roworiented = PETSC_TRUE; 5542 5543 /* stuff used for matrix vector multiply */ 5544 b->lvec = NULL; 5545 b->Mvctx = NULL; 5546 5547 /* stuff for MatGetRow() */ 5548 b->rowindices = 0; 5549 b->rowvalues = 0; 5550 b->getrowactive = PETSC_FALSE; 5551 5552 /* flexible pointer used in CUSP/CUSPARSE classes */ 5553 b->spptr = NULL; 5554 5555 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5556 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5557 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5558 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5559 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5560 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5561 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5562 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5563 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5564 #if defined(PETSC_HAVE_MKL_SPARSE) 5565 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5566 #endif 5567 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5568 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5569 #if defined(PETSC_HAVE_ELEMENTAL) 5570 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5571 #endif 5572 #if defined(PETSC_HAVE_HYPRE) 5573 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5574 #endif 5575 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5576 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5577 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5578 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5579 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5580 #if defined(PETSC_HAVE_HYPRE) 5581 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5582 #endif 5583 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5584 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5585 PetscFunctionReturn(0); 5586 } 5587 5588 /*@C 5589 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5590 and "off-diagonal" part of the matrix in CSR format. 5591 5592 Collective on MPI_Comm 5593 5594 Input Parameters: 5595 + comm - MPI communicator 5596 . m - number of local rows (Cannot be PETSC_DECIDE) 5597 . n - This value should be the same as the local size used in creating the 5598 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5599 calculated if N is given) For square matrices n is almost always m. 5600 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5601 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5602 . i - row indices for "diagonal" portion of matrix 5603 . j - column indices 5604 . a - matrix values 5605 . oi - row indices for "off-diagonal" portion of matrix 5606 . oj - column indices 5607 - oa - matrix values 5608 5609 Output Parameter: 5610 . mat - the matrix 5611 5612 Level: advanced 5613 5614 Notes: 5615 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5616 must free the arrays once the matrix has been destroyed and not before. 5617 5618 The i and j indices are 0 based 5619 5620 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5621 5622 This sets local rows and cannot be used to set off-processor values. 5623 5624 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5625 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5626 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5627 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5628 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5629 communication if it is known that only local entries will be set. 5630 5631 .keywords: matrix, aij, compressed row, sparse, parallel 5632 5633 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5634 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5635 @*/ 5636 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5637 { 5638 PetscErrorCode ierr; 5639 Mat_MPIAIJ *maij; 5640 5641 PetscFunctionBegin; 5642 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5643 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5644 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5645 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5646 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5647 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5648 maij = (Mat_MPIAIJ*) (*mat)->data; 5649 5650 (*mat)->preallocated = PETSC_TRUE; 5651 5652 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5653 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5654 5655 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5656 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5657 5658 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5659 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5660 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5661 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5662 5663 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5664 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5665 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5666 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5667 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5668 PetscFunctionReturn(0); 5669 } 5670 5671 /* 5672 Special version for direct calls from Fortran 5673 */ 5674 #include <petsc/private/fortranimpl.h> 5675 5676 /* Change these macros so can be used in void function */ 5677 #undef CHKERRQ 5678 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5679 #undef SETERRQ2 5680 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5681 #undef SETERRQ3 5682 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5683 #undef SETERRQ 5684 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5685 5686 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5687 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5688 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5689 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5690 #else 5691 #endif 5692 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5693 { 5694 Mat mat = *mmat; 5695 PetscInt m = *mm, n = *mn; 5696 InsertMode addv = *maddv; 5697 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5698 PetscScalar value; 5699 PetscErrorCode ierr; 5700 5701 MatCheckPreallocated(mat,1); 5702 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5703 5704 #if defined(PETSC_USE_DEBUG) 5705 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5706 #endif 5707 { 5708 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5709 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5710 PetscBool roworiented = aij->roworiented; 5711 5712 /* Some Variables required in the macro */ 5713 Mat A = aij->A; 5714 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5715 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5716 MatScalar *aa = a->a; 5717 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5718 Mat B = aij->B; 5719 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5720 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5721 MatScalar *ba = b->a; 5722 5723 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5724 PetscInt nonew = a->nonew; 5725 MatScalar *ap1,*ap2; 5726 5727 PetscFunctionBegin; 5728 for (i=0; i<m; i++) { 5729 if (im[i] < 0) continue; 5730 #if defined(PETSC_USE_DEBUG) 5731 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5732 #endif 5733 if (im[i] >= rstart && im[i] < rend) { 5734 row = im[i] - rstart; 5735 lastcol1 = -1; 5736 rp1 = aj + ai[row]; 5737 ap1 = aa + ai[row]; 5738 rmax1 = aimax[row]; 5739 nrow1 = ailen[row]; 5740 low1 = 0; 5741 high1 = nrow1; 5742 lastcol2 = -1; 5743 rp2 = bj + bi[row]; 5744 ap2 = ba + bi[row]; 5745 rmax2 = bimax[row]; 5746 nrow2 = bilen[row]; 5747 low2 = 0; 5748 high2 = nrow2; 5749 5750 for (j=0; j<n; j++) { 5751 if (roworiented) value = v[i*n+j]; 5752 else value = v[i+j*m]; 5753 if (in[j] >= cstart && in[j] < cend) { 5754 col = in[j] - cstart; 5755 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5756 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5757 } else if (in[j] < 0) continue; 5758 #if defined(PETSC_USE_DEBUG) 5759 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5760 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5761 #endif 5762 else { 5763 if (mat->was_assembled) { 5764 if (!aij->colmap) { 5765 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5766 } 5767 #if defined(PETSC_USE_CTABLE) 5768 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5769 col--; 5770 #else 5771 col = aij->colmap[in[j]] - 1; 5772 #endif 5773 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5774 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5775 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5776 col = in[j]; 5777 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5778 B = aij->B; 5779 b = (Mat_SeqAIJ*)B->data; 5780 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5781 rp2 = bj + bi[row]; 5782 ap2 = ba + bi[row]; 5783 rmax2 = bimax[row]; 5784 nrow2 = bilen[row]; 5785 low2 = 0; 5786 high2 = nrow2; 5787 bm = aij->B->rmap->n; 5788 ba = b->a; 5789 } 5790 } else col = in[j]; 5791 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5792 } 5793 } 5794 } else if (!aij->donotstash) { 5795 if (roworiented) { 5796 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5797 } else { 5798 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5799 } 5800 } 5801 } 5802 } 5803 PetscFunctionReturnVoid(); 5804 } 5805 5806