1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 126 PetscFunctionBegin; 127 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 128 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 129 } else { 130 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 131 } 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 138 PetscErrorCode ierr; 139 PetscInt i,rstart,nrows,*rows; 140 141 PetscFunctionBegin; 142 *zrows = NULL; 143 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 144 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 145 for (i=0; i<nrows; i++) rows[i] += rstart; 146 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 147 PetscFunctionReturn(0); 148 } 149 150 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 151 { 152 PetscErrorCode ierr; 153 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 154 PetscInt i,n,*garray = aij->garray; 155 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 156 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 157 PetscReal *work; 158 159 PetscFunctionBegin; 160 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 161 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 162 if (type == NORM_2) { 163 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 164 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 165 } 166 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 167 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 168 } 169 } else if (type == NORM_1) { 170 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 171 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 172 } 173 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 174 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 175 } 176 } else if (type == NORM_INFINITY) { 177 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 178 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 179 } 180 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 181 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 182 } 183 184 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 185 if (type == NORM_INFINITY) { 186 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 187 } else { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } 190 ierr = PetscFree(work);CHKERRQ(ierr); 191 if (type == NORM_2) { 192 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 193 } 194 PetscFunctionReturn(0); 195 } 196 197 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 198 { 199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 200 IS sis,gis; 201 PetscErrorCode ierr; 202 const PetscInt *isis,*igis; 203 PetscInt n,*iis,nsis,ngis,rstart,i; 204 205 PetscFunctionBegin; 206 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 207 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 208 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 209 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 210 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 211 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 212 213 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 215 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 216 n = ngis + nsis; 217 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 218 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 219 for (i=0; i<n; i++) iis[i] += rstart; 220 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 221 222 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 223 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 224 ierr = ISDestroy(&sis);CHKERRQ(ierr); 225 ierr = ISDestroy(&gis);CHKERRQ(ierr); 226 PetscFunctionReturn(0); 227 } 228 229 /* 230 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 231 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 232 233 Only for square matrices 234 235 Used by a preconditioner, hence PETSC_EXTERN 236 */ 237 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 238 { 239 PetscMPIInt rank,size; 240 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 241 PetscErrorCode ierr; 242 Mat mat; 243 Mat_SeqAIJ *gmata; 244 PetscMPIInt tag; 245 MPI_Status status; 246 PetscBool aij; 247 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 248 249 PetscFunctionBegin; 250 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 251 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 252 if (!rank) { 253 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 254 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 255 } 256 if (reuse == MAT_INITIAL_MATRIX) { 257 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 258 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 259 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 260 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 261 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 262 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 263 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 264 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 265 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 266 267 rowners[0] = 0; 268 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 269 rstart = rowners[rank]; 270 rend = rowners[rank+1]; 271 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 272 if (!rank) { 273 gmata = (Mat_SeqAIJ*) gmat->data; 274 /* send row lengths to all processors */ 275 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 276 for (i=1; i<size; i++) { 277 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 278 } 279 /* determine number diagonal and off-diagonal counts */ 280 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 281 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 282 jj = 0; 283 for (i=0; i<m; i++) { 284 for (j=0; j<dlens[i]; j++) { 285 if (gmata->j[jj] < rstart) ld[i]++; 286 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 287 jj++; 288 } 289 } 290 /* send column indices to other processes */ 291 for (i=1; i<size; i++) { 292 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 293 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 295 } 296 297 /* send numerical values to other processes */ 298 for (i=1; i<size; i++) { 299 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 300 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 301 } 302 gmataa = gmata->a; 303 gmataj = gmata->j; 304 305 } else { 306 /* receive row lengths */ 307 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 /* receive column indices */ 309 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 311 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 /* determine number diagonal and off-diagonal counts */ 313 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 314 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 315 jj = 0; 316 for (i=0; i<m; i++) { 317 for (j=0; j<dlens[i]; j++) { 318 if (gmataj[jj] < rstart) ld[i]++; 319 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 320 jj++; 321 } 322 } 323 /* receive numerical values */ 324 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 325 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 326 } 327 /* set preallocation */ 328 for (i=0; i<m; i++) { 329 dlens[i] -= olens[i]; 330 } 331 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 332 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 333 334 for (i=0; i<m; i++) { 335 dlens[i] += olens[i]; 336 } 337 cnt = 0; 338 for (i=0; i<m; i++) { 339 row = rstart + i; 340 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 341 cnt += dlens[i]; 342 } 343 if (rank) { 344 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 345 } 346 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 347 ierr = PetscFree(rowners);CHKERRQ(ierr); 348 349 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 350 351 *inmat = mat; 352 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 353 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 354 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 355 mat = *inmat; 356 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 357 if (!rank) { 358 /* send numerical values to other processes */ 359 gmata = (Mat_SeqAIJ*) gmat->data; 360 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 361 gmataa = gmata->a; 362 for (i=1; i<size; i++) { 363 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 364 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 365 } 366 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 367 } else { 368 /* receive numerical values from process 0*/ 369 nz = Ad->nz + Ao->nz; 370 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 371 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 372 } 373 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 374 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 375 ad = Ad->a; 376 ao = Ao->a; 377 if (mat->rmap->n) { 378 i = 0; 379 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 380 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 381 } 382 for (i=1; i<mat->rmap->n; i++) { 383 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 384 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 385 } 386 i--; 387 if (mat->rmap->n) { 388 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 389 } 390 if (rank) { 391 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 392 } 393 } 394 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 396 PetscFunctionReturn(0); 397 } 398 399 /* 400 Local utility routine that creates a mapping from the global column 401 number to the local number in the off-diagonal part of the local 402 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 403 a slightly higher hash table cost; without it it is not scalable (each processor 404 has an order N integer array but is fast to acess. 405 */ 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 463 { \ 464 if (col <= lastcol2) low2 = 0; \ 465 else high2 = nrow2; \ 466 lastcol2 = col; \ 467 while (high2-low2 > 5) { \ 468 t = (low2+high2)/2; \ 469 if (rp2[t] > col) high2 = t; \ 470 else low2 = t; \ 471 } \ 472 for (_i=low2; _i<high2; _i++) { \ 473 if (rp2[_i] > col) break; \ 474 if (rp2[_i] == col) { \ 475 if (addv == ADD_VALUES) ap2[_i] += value; \ 476 else ap2[_i] = value; \ 477 goto b_noinsert; \ 478 } \ 479 } \ 480 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 483 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 484 N = nrow2++ - 1; b->nz++; high2++; \ 485 /* shift up all the later entries in this row */ \ 486 for (ii=N; ii>=_i; ii--) { \ 487 rp2[ii+1] = rp2[ii]; \ 488 ap2[ii+1] = ap2[ii]; \ 489 } \ 490 rp2[_i] = col; \ 491 ap2[_i] = value; \ 492 B->nonzerostate++; \ 493 b_noinsert: ; \ 494 bilen[row] = nrow2; \ 495 } 496 497 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 498 { 499 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 500 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 501 PetscErrorCode ierr; 502 PetscInt l,*garray = mat->garray,diag; 503 504 PetscFunctionBegin; 505 /* code only works for square matrices A */ 506 507 /* find size of row to the left of the diagonal part */ 508 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 509 row = row - diag; 510 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 511 if (garray[b->j[b->i[row]+l]] > diag) break; 512 } 513 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 514 515 /* diagonal part */ 516 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* right of diagonal part */ 519 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 520 PetscFunctionReturn(0); 521 } 522 523 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 524 { 525 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 526 PetscScalar value; 527 PetscErrorCode ierr; 528 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 529 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 530 PetscBool roworiented = aij->roworiented; 531 532 /* Some Variables required in the macro */ 533 Mat A = aij->A; 534 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 535 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 536 MatScalar *aa = a->a; 537 PetscBool ignorezeroentries = a->ignorezeroentries; 538 Mat B = aij->B; 539 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 540 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 541 MatScalar *ba = b->a; 542 543 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 544 PetscInt nonew; 545 MatScalar *ap1,*ap2; 546 547 PetscFunctionBegin; 548 for (i=0; i<m; i++) { 549 if (im[i] < 0) continue; 550 #if defined(PETSC_USE_DEBUG) 551 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 552 #endif 553 if (im[i] >= rstart && im[i] < rend) { 554 row = im[i] - rstart; 555 lastcol1 = -1; 556 rp1 = aj + ai[row]; 557 ap1 = aa + ai[row]; 558 rmax1 = aimax[row]; 559 nrow1 = ailen[row]; 560 low1 = 0; 561 high1 = nrow1; 562 lastcol2 = -1; 563 rp2 = bj + bi[row]; 564 ap2 = ba + bi[row]; 565 rmax2 = bimax[row]; 566 nrow2 = bilen[row]; 567 low2 = 0; 568 high2 = nrow2; 569 570 for (j=0; j<n; j++) { 571 if (roworiented) value = v[i*n+j]; 572 else value = v[i+j*m]; 573 if (in[j] >= cstart && in[j] < cend) { 574 col = in[j] - cstart; 575 nonew = a->nonew; 576 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 577 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 578 } else if (in[j] < 0) continue; 579 #if defined(PETSC_USE_DEBUG) 580 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 581 #endif 582 else { 583 if (mat->was_assembled) { 584 if (!aij->colmap) { 585 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 586 } 587 #if defined(PETSC_USE_CTABLE) 588 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 589 col--; 590 #else 591 col = aij->colmap[in[j]] - 1; 592 #endif 593 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 594 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 595 col = in[j]; 596 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 597 B = aij->B; 598 b = (Mat_SeqAIJ*)B->data; 599 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 600 rp2 = bj + bi[row]; 601 ap2 = ba + bi[row]; 602 rmax2 = bimax[row]; 603 nrow2 = bilen[row]; 604 low2 = 0; 605 high2 = nrow2; 606 bm = aij->B->rmap->n; 607 ba = b->a; 608 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 609 } else col = in[j]; 610 nonew = b->nonew; 611 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 612 } 613 } 614 } else { 615 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 616 if (!aij->donotstash) { 617 mat->assembled = PETSC_FALSE; 618 if (roworiented) { 619 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 620 } else { 621 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 622 } 623 } 624 } 625 } 626 PetscFunctionReturn(0); 627 } 628 629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 632 PetscErrorCode ierr; 633 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 634 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 635 636 PetscFunctionBegin; 637 for (i=0; i<m; i++) { 638 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 639 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 640 if (idxm[i] >= rstart && idxm[i] < rend) { 641 row = idxm[i] - rstart; 642 for (j=0; j<n; j++) { 643 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 644 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 645 if (idxn[j] >= cstart && idxn[j] < cend) { 646 col = idxn[j] - cstart; 647 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 648 } else { 649 if (!aij->colmap) { 650 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 651 } 652 #if defined(PETSC_USE_CTABLE) 653 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 654 col--; 655 #else 656 col = aij->colmap[idxn[j]] - 1; 657 #endif 658 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 659 else { 660 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 661 } 662 } 663 } 664 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 665 } 666 PetscFunctionReturn(0); 667 } 668 669 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 670 671 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 672 { 673 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 674 PetscErrorCode ierr; 675 PetscInt nstash,reallocs; 676 677 PetscFunctionBegin; 678 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 679 680 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 681 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 682 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 687 { 688 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 689 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 690 PetscErrorCode ierr; 691 PetscMPIInt n; 692 PetscInt i,j,rstart,ncols,flg; 693 PetscInt *row,*col; 694 PetscBool other_disassembled; 695 PetscScalar *val; 696 697 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 698 699 PetscFunctionBegin; 700 if (!aij->donotstash && !mat->nooffprocentries) { 701 while (1) { 702 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 703 if (!flg) break; 704 705 for (i=0; i<n; ) { 706 /* Now identify the consecutive vals belonging to the same row */ 707 for (j=i,rstart=row[j]; j<n; j++) { 708 if (row[j] != rstart) break; 709 } 710 if (j < n) ncols = j-i; 711 else ncols = n-i; 712 /* Now assemble all these values with a single function call */ 713 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 714 715 i = j; 716 } 717 } 718 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 719 } 720 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 721 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 722 723 /* determine if any processor has disassembled, if so we must 724 also disassemble ourselfs, in order that we may reassemble. */ 725 /* 726 if nonzero structure of submatrix B cannot change then we know that 727 no processor disassembled thus we can skip this stuff 728 */ 729 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 730 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 731 if (mat->was_assembled && !other_disassembled) { 732 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 733 } 734 } 735 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 736 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 737 } 738 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 739 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 740 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 741 742 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 743 744 aij->rowvalues = 0; 745 746 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 747 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 748 749 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 750 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 751 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 752 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 758 { 759 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 760 PetscErrorCode ierr; 761 762 PetscFunctionBegin; 763 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 764 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 765 PetscFunctionReturn(0); 766 } 767 768 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 769 { 770 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 771 PetscInt *lrows; 772 PetscInt r, len; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 /* get locally owned rows */ 777 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 778 /* fix right hand side if needed */ 779 if (x && b) { 780 const PetscScalar *xx; 781 PetscScalar *bb; 782 783 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 784 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 785 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 786 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 787 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 788 } 789 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 790 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 791 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 792 PetscBool cong; 793 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 794 if (cong) A->congruentlayouts = 1; 795 else A->congruentlayouts = 0; 796 } 797 if ((diag != 0.0) && A->congruentlayouts) { 798 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 799 } else if (diag != 0.0) { 800 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 801 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 802 for (r = 0; r < len; ++r) { 803 const PetscInt row = lrows[r] + A->rmap->rstart; 804 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 805 } 806 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 808 } else { 809 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 810 } 811 ierr = PetscFree(lrows);CHKERRQ(ierr); 812 813 /* only change matrix nonzero state if pattern was allowed to be changed */ 814 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 815 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 816 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 817 } 818 PetscFunctionReturn(0); 819 } 820 821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 822 { 823 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 824 PetscErrorCode ierr; 825 PetscMPIInt n = A->rmap->n; 826 PetscInt i,j,r,m,p = 0,len = 0; 827 PetscInt *lrows,*owners = A->rmap->range; 828 PetscSFNode *rrows; 829 PetscSF sf; 830 const PetscScalar *xx; 831 PetscScalar *bb,*mask; 832 Vec xmask,lmask; 833 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 834 const PetscInt *aj, *ii,*ridx; 835 PetscScalar *aa; 836 837 PetscFunctionBegin; 838 /* Create SF where leaves are input rows and roots are owned rows */ 839 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 840 for (r = 0; r < n; ++r) lrows[r] = -1; 841 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 842 for (r = 0; r < N; ++r) { 843 const PetscInt idx = rows[r]; 844 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 845 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 846 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 847 } 848 rrows[r].rank = p; 849 rrows[r].index = rows[r] - owners[p]; 850 } 851 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 852 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 853 /* Collect flags for rows to be zeroed */ 854 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 856 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 857 /* Compress and put in row numbers */ 858 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 859 /* zero diagonal part of matrix */ 860 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 861 /* handle off diagonal part of matrix */ 862 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 863 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 864 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 865 for (i=0; i<len; i++) bb[lrows[i]] = 1; 866 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 867 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 870 if (x) { 871 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 874 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 875 } 876 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 877 /* remove zeroed rows of off diagonal matrix */ 878 ii = aij->i; 879 for (i=0; i<len; i++) { 880 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 881 } 882 /* loop over all elements of off process part of matrix zeroing removed columns*/ 883 if (aij->compressedrow.use) { 884 m = aij->compressedrow.nrows; 885 ii = aij->compressedrow.i; 886 ridx = aij->compressedrow.rindex; 887 for (i=0; i<m; i++) { 888 n = ii[i+1] - ii[i]; 889 aj = aij->j + ii[i]; 890 aa = aij->a + ii[i]; 891 892 for (j=0; j<n; j++) { 893 if (PetscAbsScalar(mask[*aj])) { 894 if (b) bb[*ridx] -= *aa*xx[*aj]; 895 *aa = 0.0; 896 } 897 aa++; 898 aj++; 899 } 900 ridx++; 901 } 902 } else { /* do not use compressed row format */ 903 m = l->B->rmap->n; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 for (j=0; j<n; j++) { 909 if (PetscAbsScalar(mask[*aj])) { 910 if (b) bb[i] -= *aa*xx[*aj]; 911 *aa = 0.0; 912 } 913 aa++; 914 aj++; 915 } 916 } 917 } 918 if (x) { 919 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 920 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 921 } 922 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 923 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 924 ierr = PetscFree(lrows);CHKERRQ(ierr); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 928 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 929 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 930 } 931 PetscFunctionReturn(0); 932 } 933 934 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 935 { 936 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 937 PetscErrorCode ierr; 938 PetscInt nt; 939 VecScatter Mvctx = a->Mvctx; 940 941 PetscFunctionBegin; 942 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 943 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 944 945 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 947 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 953 { 954 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 955 PetscErrorCode ierr; 956 957 PetscFunctionBegin; 958 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 962 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 963 { 964 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 965 PetscErrorCode ierr; 966 VecScatter Mvctx = a->Mvctx; 967 968 PetscFunctionBegin; 969 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 970 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 971 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 972 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 973 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 977 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 978 { 979 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 980 PetscErrorCode ierr; 981 PetscBool merged; 982 983 PetscFunctionBegin; 984 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 985 /* do nondiagonal part */ 986 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 987 if (!merged) { 988 /* send it on its way */ 989 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 990 /* do local part */ 991 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 992 /* receive remote parts: note this assumes the values are not actually */ 993 /* added in yy until the next line, */ 994 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 995 } else { 996 /* do local part */ 997 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 998 /* send it on its way */ 999 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1000 /* values actually were received in the Begin() but we need to call this nop */ 1001 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1002 } 1003 PetscFunctionReturn(0); 1004 } 1005 1006 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1007 { 1008 MPI_Comm comm; 1009 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1010 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1011 IS Me,Notme; 1012 PetscErrorCode ierr; 1013 PetscInt M,N,first,last,*notme,i; 1014 PetscMPIInt size; 1015 1016 PetscFunctionBegin; 1017 /* Easy test: symmetric diagonal block */ 1018 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1019 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1020 if (!*f) PetscFunctionReturn(0); 1021 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1022 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1023 if (size == 1) PetscFunctionReturn(0); 1024 1025 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1026 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1027 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1028 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1029 for (i=0; i<first; i++) notme[i] = i; 1030 for (i=last; i<M; i++) notme[i-last+first] = i; 1031 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1032 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1033 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1034 Aoff = Aoffs[0]; 1035 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1036 Boff = Boffs[0]; 1037 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1038 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1039 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1040 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1041 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1042 ierr = PetscFree(notme);CHKERRQ(ierr); 1043 PetscFunctionReturn(0); 1044 } 1045 1046 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1047 { 1048 PetscErrorCode ierr; 1049 1050 PetscFunctionBegin; 1051 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1052 PetscFunctionReturn(0); 1053 } 1054 1055 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1056 { 1057 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1058 PetscErrorCode ierr; 1059 1060 PetscFunctionBegin; 1061 /* do nondiagonal part */ 1062 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1063 /* send it on its way */ 1064 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1065 /* do local part */ 1066 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1067 /* receive remote parts */ 1068 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1069 PetscFunctionReturn(0); 1070 } 1071 1072 /* 1073 This only works correctly for square matrices where the subblock A->A is the 1074 diagonal block 1075 */ 1076 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1077 { 1078 PetscErrorCode ierr; 1079 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1080 1081 PetscFunctionBegin; 1082 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1083 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1084 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1089 { 1090 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1091 PetscErrorCode ierr; 1092 1093 PetscFunctionBegin; 1094 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1095 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1100 { 1101 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1102 PetscErrorCode ierr; 1103 1104 PetscFunctionBegin; 1105 #if defined(PETSC_USE_LOG) 1106 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1107 #endif 1108 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1109 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1110 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1111 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1112 #if defined(PETSC_USE_CTABLE) 1113 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1114 #else 1115 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1116 #endif 1117 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1118 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1119 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1120 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1121 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1122 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1123 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1124 1125 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1134 #if defined(PETSC_HAVE_ELEMENTAL) 1135 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1136 #endif 1137 #if defined(PETSC_HAVE_HYPRE) 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1140 #endif 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1147 { 1148 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1149 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1150 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1151 PetscErrorCode ierr; 1152 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1153 int fd; 1154 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1155 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1156 PetscScalar *column_values; 1157 PetscInt message_count,flowcontrolcount; 1158 FILE *file; 1159 1160 PetscFunctionBegin; 1161 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1162 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1163 nz = A->nz + B->nz; 1164 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1165 if (!rank) { 1166 header[0] = MAT_FILE_CLASSID; 1167 header[1] = mat->rmap->N; 1168 header[2] = mat->cmap->N; 1169 1170 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1171 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1172 /* get largest number of rows any processor has */ 1173 rlen = mat->rmap->n; 1174 range = mat->rmap->range; 1175 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1176 } else { 1177 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1178 rlen = mat->rmap->n; 1179 } 1180 1181 /* load up the local row counts */ 1182 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1183 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1184 1185 /* store the row lengths to the file */ 1186 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1187 if (!rank) { 1188 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1189 for (i=1; i<size; i++) { 1190 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1191 rlen = range[i+1] - range[i]; 1192 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1193 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1194 } 1195 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1196 } else { 1197 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1198 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1199 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1200 } 1201 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1202 1203 /* load up the local column indices */ 1204 nzmax = nz; /* th processor needs space a largest processor needs */ 1205 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1206 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1207 cnt = 0; 1208 for (i=0; i<mat->rmap->n; i++) { 1209 for (j=B->i[i]; j<B->i[i+1]; j++) { 1210 if ((col = garray[B->j[j]]) > cstart) break; 1211 column_indices[cnt++] = col; 1212 } 1213 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1214 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1215 } 1216 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1217 1218 /* store the column indices to the file */ 1219 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1220 if (!rank) { 1221 MPI_Status status; 1222 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1223 for (i=1; i<size; i++) { 1224 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1225 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1226 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1227 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1228 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1229 } 1230 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1231 } else { 1232 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1233 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1234 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1235 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1236 } 1237 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1238 1239 /* load up the local column values */ 1240 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1241 cnt = 0; 1242 for (i=0; i<mat->rmap->n; i++) { 1243 for (j=B->i[i]; j<B->i[i+1]; j++) { 1244 if (garray[B->j[j]] > cstart) break; 1245 column_values[cnt++] = B->a[j]; 1246 } 1247 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1248 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1249 } 1250 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1251 1252 /* store the column values to the file */ 1253 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1254 if (!rank) { 1255 MPI_Status status; 1256 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1257 for (i=1; i<size; i++) { 1258 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1259 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1260 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1261 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1262 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1263 } 1264 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1265 } else { 1266 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1267 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1269 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1270 } 1271 ierr = PetscFree(column_values);CHKERRQ(ierr); 1272 1273 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1274 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #include <petscdraw.h> 1279 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1280 { 1281 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1282 PetscErrorCode ierr; 1283 PetscMPIInt rank = aij->rank,size = aij->size; 1284 PetscBool isdraw,iascii,isbinary; 1285 PetscViewer sviewer; 1286 PetscViewerFormat format; 1287 1288 PetscFunctionBegin; 1289 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1290 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1291 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1292 if (iascii) { 1293 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1294 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1295 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1296 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1297 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1298 for (i=0; i<(PetscInt)size; i++) { 1299 nmax = PetscMax(nmax,nz[i]); 1300 nmin = PetscMin(nmin,nz[i]); 1301 navg += nz[i]; 1302 } 1303 ierr = PetscFree(nz);CHKERRQ(ierr); 1304 navg = navg/size; 1305 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1309 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1310 MatInfo info; 1311 PetscBool inodes; 1312 1313 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1314 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1315 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1317 if (!inodes) { 1318 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1319 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1320 } else { 1321 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1322 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1323 } 1324 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1325 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1326 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1327 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1328 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1329 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1330 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1331 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1334 PetscInt inodecount,inodelimit,*inodes; 1335 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1336 if (inodes) { 1337 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1338 } else { 1339 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1340 } 1341 PetscFunctionReturn(0); 1342 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1343 PetscFunctionReturn(0); 1344 } 1345 } else if (isbinary) { 1346 if (size == 1) { 1347 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1348 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1349 } else { 1350 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1351 } 1352 PetscFunctionReturn(0); 1353 } else if (isdraw) { 1354 PetscDraw draw; 1355 PetscBool isnull; 1356 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1357 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1358 if (isnull) PetscFunctionReturn(0); 1359 } 1360 1361 { 1362 /* assemble the entire matrix onto first processor. */ 1363 Mat A; 1364 Mat_SeqAIJ *Aloc; 1365 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1366 MatScalar *a; 1367 1368 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1369 if (!rank) { 1370 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1371 } else { 1372 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1373 } 1374 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1375 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1376 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1377 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1378 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1379 1380 /* copy over the A part */ 1381 Aloc = (Mat_SeqAIJ*)aij->A->data; 1382 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1383 row = mat->rmap->rstart; 1384 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1385 for (i=0; i<m; i++) { 1386 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1387 row++; 1388 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1389 } 1390 aj = Aloc->j; 1391 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1392 1393 /* copy over the B part */ 1394 Aloc = (Mat_SeqAIJ*)aij->B->data; 1395 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1396 row = mat->rmap->rstart; 1397 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1398 ct = cols; 1399 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1400 for (i=0; i<m; i++) { 1401 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1402 row++; 1403 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1404 } 1405 ierr = PetscFree(ct);CHKERRQ(ierr); 1406 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1407 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1408 /* 1409 Everyone has to call to draw the matrix since the graphics waits are 1410 synchronized across all processors that share the PetscDraw object 1411 */ 1412 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1413 if (!rank) { 1414 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1415 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1416 } 1417 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1418 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1419 ierr = MatDestroy(&A);CHKERRQ(ierr); 1420 } 1421 PetscFunctionReturn(0); 1422 } 1423 1424 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1425 { 1426 PetscErrorCode ierr; 1427 PetscBool iascii,isdraw,issocket,isbinary; 1428 1429 PetscFunctionBegin; 1430 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1431 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1432 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1433 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1434 if (iascii || isdraw || isbinary || issocket) { 1435 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1436 } 1437 PetscFunctionReturn(0); 1438 } 1439 1440 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1441 { 1442 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1443 PetscErrorCode ierr; 1444 Vec bb1 = 0; 1445 PetscBool hasop; 1446 1447 PetscFunctionBegin; 1448 if (flag == SOR_APPLY_UPPER) { 1449 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1450 PetscFunctionReturn(0); 1451 } 1452 1453 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1454 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1455 } 1456 1457 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1458 if (flag & SOR_ZERO_INITIAL_GUESS) { 1459 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1460 its--; 1461 } 1462 1463 while (its--) { 1464 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1465 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1466 1467 /* update rhs: bb1 = bb - B*x */ 1468 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1469 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1470 1471 /* local sweep */ 1472 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1473 } 1474 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1475 if (flag & SOR_ZERO_INITIAL_GUESS) { 1476 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1477 its--; 1478 } 1479 while (its--) { 1480 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1481 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1482 1483 /* update rhs: bb1 = bb - B*x */ 1484 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1485 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1486 1487 /* local sweep */ 1488 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1489 } 1490 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1491 if (flag & SOR_ZERO_INITIAL_GUESS) { 1492 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1493 its--; 1494 } 1495 while (its--) { 1496 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1497 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1498 1499 /* update rhs: bb1 = bb - B*x */ 1500 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1501 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1502 1503 /* local sweep */ 1504 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1505 } 1506 } else if (flag & SOR_EISENSTAT) { 1507 Vec xx1; 1508 1509 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1510 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1511 1512 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1513 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 if (!mat->diag) { 1515 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1516 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1517 } 1518 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1519 if (hasop) { 1520 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1521 } else { 1522 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1523 } 1524 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1525 1526 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1527 1528 /* local sweep */ 1529 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1530 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1531 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1532 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1533 1534 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1535 1536 matin->factorerrortype = mat->A->factorerrortype; 1537 PetscFunctionReturn(0); 1538 } 1539 1540 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1541 { 1542 Mat aA,aB,Aperm; 1543 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1544 PetscScalar *aa,*ba; 1545 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1546 PetscSF rowsf,sf; 1547 IS parcolp = NULL; 1548 PetscBool done; 1549 PetscErrorCode ierr; 1550 1551 PetscFunctionBegin; 1552 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1553 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1554 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1555 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1556 1557 /* Invert row permutation to find out where my rows should go */ 1558 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1559 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1560 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1561 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1562 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1563 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1564 1565 /* Invert column permutation to find out where my columns should go */ 1566 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1567 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1568 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1569 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1570 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1571 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1572 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1573 1574 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1575 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1576 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1577 1578 /* Find out where my gcols should go */ 1579 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1580 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1581 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1582 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1583 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1584 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1585 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1586 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1587 1588 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1589 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1590 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1591 for (i=0; i<m; i++) { 1592 PetscInt row = rdest[i],rowner; 1593 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1594 for (j=ai[i]; j<ai[i+1]; j++) { 1595 PetscInt cowner,col = cdest[aj[j]]; 1596 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1597 if (rowner == cowner) dnnz[i]++; 1598 else onnz[i]++; 1599 } 1600 for (j=bi[i]; j<bi[i+1]; j++) { 1601 PetscInt cowner,col = gcdest[bj[j]]; 1602 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1603 if (rowner == cowner) dnnz[i]++; 1604 else onnz[i]++; 1605 } 1606 } 1607 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1608 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1609 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1610 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1611 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1612 1613 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1614 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1615 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1616 for (i=0; i<m; i++) { 1617 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1618 PetscInt j0,rowlen; 1619 rowlen = ai[i+1] - ai[i]; 1620 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1621 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1622 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1623 } 1624 rowlen = bi[i+1] - bi[i]; 1625 for (j0=j=0; j<rowlen; j0=j) { 1626 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1627 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1628 } 1629 } 1630 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1631 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1632 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1633 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1634 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1635 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1636 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1637 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1638 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1639 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1640 *B = Aperm; 1641 PetscFunctionReturn(0); 1642 } 1643 1644 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1645 { 1646 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1647 PetscErrorCode ierr; 1648 1649 PetscFunctionBegin; 1650 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1651 if (ghosts) *ghosts = aij->garray; 1652 PetscFunctionReturn(0); 1653 } 1654 1655 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1656 { 1657 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1658 Mat A = mat->A,B = mat->B; 1659 PetscErrorCode ierr; 1660 PetscReal isend[5],irecv[5]; 1661 1662 PetscFunctionBegin; 1663 info->block_size = 1.0; 1664 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1665 1666 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1667 isend[3] = info->memory; isend[4] = info->mallocs; 1668 1669 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1670 1671 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1672 isend[3] += info->memory; isend[4] += info->mallocs; 1673 if (flag == MAT_LOCAL) { 1674 info->nz_used = isend[0]; 1675 info->nz_allocated = isend[1]; 1676 info->nz_unneeded = isend[2]; 1677 info->memory = isend[3]; 1678 info->mallocs = isend[4]; 1679 } else if (flag == MAT_GLOBAL_MAX) { 1680 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1681 1682 info->nz_used = irecv[0]; 1683 info->nz_allocated = irecv[1]; 1684 info->nz_unneeded = irecv[2]; 1685 info->memory = irecv[3]; 1686 info->mallocs = irecv[4]; 1687 } else if (flag == MAT_GLOBAL_SUM) { 1688 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1689 1690 info->nz_used = irecv[0]; 1691 info->nz_allocated = irecv[1]; 1692 info->nz_unneeded = irecv[2]; 1693 info->memory = irecv[3]; 1694 info->mallocs = irecv[4]; 1695 } 1696 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1697 info->fill_ratio_needed = 0; 1698 info->factor_mallocs = 0; 1699 PetscFunctionReturn(0); 1700 } 1701 1702 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1703 { 1704 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1705 PetscErrorCode ierr; 1706 1707 PetscFunctionBegin; 1708 switch (op) { 1709 case MAT_NEW_NONZERO_LOCATIONS: 1710 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1711 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1712 case MAT_KEEP_NONZERO_PATTERN: 1713 case MAT_NEW_NONZERO_LOCATION_ERR: 1714 case MAT_USE_INODES: 1715 case MAT_IGNORE_ZERO_ENTRIES: 1716 MatCheckPreallocated(A,1); 1717 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1718 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1719 break; 1720 case MAT_ROW_ORIENTED: 1721 MatCheckPreallocated(A,1); 1722 a->roworiented = flg; 1723 1724 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1725 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1726 break; 1727 case MAT_NEW_DIAGONALS: 1728 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1729 break; 1730 case MAT_IGNORE_OFF_PROC_ENTRIES: 1731 a->donotstash = flg; 1732 break; 1733 case MAT_SPD: 1734 A->spd_set = PETSC_TRUE; 1735 A->spd = flg; 1736 if (flg) { 1737 A->symmetric = PETSC_TRUE; 1738 A->structurally_symmetric = PETSC_TRUE; 1739 A->symmetric_set = PETSC_TRUE; 1740 A->structurally_symmetric_set = PETSC_TRUE; 1741 } 1742 break; 1743 case MAT_SYMMETRIC: 1744 MatCheckPreallocated(A,1); 1745 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1746 break; 1747 case MAT_STRUCTURALLY_SYMMETRIC: 1748 MatCheckPreallocated(A,1); 1749 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1750 break; 1751 case MAT_HERMITIAN: 1752 MatCheckPreallocated(A,1); 1753 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1754 break; 1755 case MAT_SYMMETRY_ETERNAL: 1756 MatCheckPreallocated(A,1); 1757 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1758 break; 1759 case MAT_SUBMAT_SINGLEIS: 1760 A->submat_singleis = flg; 1761 break; 1762 case MAT_STRUCTURE_ONLY: 1763 /* The option is handled directly by MatSetOption() */ 1764 break; 1765 default: 1766 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1767 } 1768 PetscFunctionReturn(0); 1769 } 1770 1771 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1772 { 1773 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1774 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1775 PetscErrorCode ierr; 1776 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1777 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1778 PetscInt *cmap,*idx_p; 1779 1780 PetscFunctionBegin; 1781 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1782 mat->getrowactive = PETSC_TRUE; 1783 1784 if (!mat->rowvalues && (idx || v)) { 1785 /* 1786 allocate enough space to hold information from the longest row. 1787 */ 1788 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1789 PetscInt max = 1,tmp; 1790 for (i=0; i<matin->rmap->n; i++) { 1791 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1792 if (max < tmp) max = tmp; 1793 } 1794 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1795 } 1796 1797 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1798 lrow = row - rstart; 1799 1800 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1801 if (!v) {pvA = 0; pvB = 0;} 1802 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1803 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1804 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1805 nztot = nzA + nzB; 1806 1807 cmap = mat->garray; 1808 if (v || idx) { 1809 if (nztot) { 1810 /* Sort by increasing column numbers, assuming A and B already sorted */ 1811 PetscInt imark = -1; 1812 if (v) { 1813 *v = v_p = mat->rowvalues; 1814 for (i=0; i<nzB; i++) { 1815 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1816 else break; 1817 } 1818 imark = i; 1819 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1820 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1821 } 1822 if (idx) { 1823 *idx = idx_p = mat->rowindices; 1824 if (imark > -1) { 1825 for (i=0; i<imark; i++) { 1826 idx_p[i] = cmap[cworkB[i]]; 1827 } 1828 } else { 1829 for (i=0; i<nzB; i++) { 1830 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1831 else break; 1832 } 1833 imark = i; 1834 } 1835 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1836 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1837 } 1838 } else { 1839 if (idx) *idx = 0; 1840 if (v) *v = 0; 1841 } 1842 } 1843 *nz = nztot; 1844 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1845 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1846 PetscFunctionReturn(0); 1847 } 1848 1849 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1850 { 1851 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1852 1853 PetscFunctionBegin; 1854 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1855 aij->getrowactive = PETSC_FALSE; 1856 PetscFunctionReturn(0); 1857 } 1858 1859 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1860 { 1861 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1862 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1863 PetscErrorCode ierr; 1864 PetscInt i,j,cstart = mat->cmap->rstart; 1865 PetscReal sum = 0.0; 1866 MatScalar *v; 1867 1868 PetscFunctionBegin; 1869 if (aij->size == 1) { 1870 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1871 } else { 1872 if (type == NORM_FROBENIUS) { 1873 v = amat->a; 1874 for (i=0; i<amat->nz; i++) { 1875 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1876 } 1877 v = bmat->a; 1878 for (i=0; i<bmat->nz; i++) { 1879 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1880 } 1881 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1882 *norm = PetscSqrtReal(*norm); 1883 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1884 } else if (type == NORM_1) { /* max column norm */ 1885 PetscReal *tmp,*tmp2; 1886 PetscInt *jj,*garray = aij->garray; 1887 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1888 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1889 *norm = 0.0; 1890 v = amat->a; jj = amat->j; 1891 for (j=0; j<amat->nz; j++) { 1892 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1893 } 1894 v = bmat->a; jj = bmat->j; 1895 for (j=0; j<bmat->nz; j++) { 1896 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1897 } 1898 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1899 for (j=0; j<mat->cmap->N; j++) { 1900 if (tmp2[j] > *norm) *norm = tmp2[j]; 1901 } 1902 ierr = PetscFree(tmp);CHKERRQ(ierr); 1903 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1904 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1905 } else if (type == NORM_INFINITY) { /* max row norm */ 1906 PetscReal ntemp = 0.0; 1907 for (j=0; j<aij->A->rmap->n; j++) { 1908 v = amat->a + amat->i[j]; 1909 sum = 0.0; 1910 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1911 sum += PetscAbsScalar(*v); v++; 1912 } 1913 v = bmat->a + bmat->i[j]; 1914 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1915 sum += PetscAbsScalar(*v); v++; 1916 } 1917 if (sum > ntemp) ntemp = sum; 1918 } 1919 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1920 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1921 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1922 } 1923 PetscFunctionReturn(0); 1924 } 1925 1926 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1927 { 1928 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1929 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1930 PetscErrorCode ierr; 1931 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1932 PetscInt cstart = A->cmap->rstart,ncol; 1933 Mat B; 1934 MatScalar *array; 1935 1936 PetscFunctionBegin; 1937 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1938 1939 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1940 ai = Aloc->i; aj = Aloc->j; 1941 bi = Bloc->i; bj = Bloc->j; 1942 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1943 PetscInt *d_nnz,*g_nnz,*o_nnz; 1944 PetscSFNode *oloc; 1945 PETSC_UNUSED PetscSF sf; 1946 1947 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1948 /* compute d_nnz for preallocation */ 1949 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1950 for (i=0; i<ai[ma]; i++) { 1951 d_nnz[aj[i]]++; 1952 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1953 } 1954 /* compute local off-diagonal contributions */ 1955 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1956 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1957 /* map those to global */ 1958 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1959 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1960 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1961 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1962 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1963 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1964 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1965 1966 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1967 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1968 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1969 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1970 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1971 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1972 } else { 1973 B = *matout; 1974 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1975 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1976 } 1977 1978 /* copy over the A part */ 1979 array = Aloc->a; 1980 row = A->rmap->rstart; 1981 for (i=0; i<ma; i++) { 1982 ncol = ai[i+1]-ai[i]; 1983 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1984 row++; 1985 array += ncol; aj += ncol; 1986 } 1987 aj = Aloc->j; 1988 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1989 1990 /* copy over the B part */ 1991 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1992 array = Bloc->a; 1993 row = A->rmap->rstart; 1994 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1995 cols_tmp = cols; 1996 for (i=0; i<mb; i++) { 1997 ncol = bi[i+1]-bi[i]; 1998 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1999 row++; 2000 array += ncol; cols_tmp += ncol; 2001 } 2002 ierr = PetscFree(cols);CHKERRQ(ierr); 2003 2004 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2005 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2006 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2007 *matout = B; 2008 } else { 2009 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2010 } 2011 PetscFunctionReturn(0); 2012 } 2013 2014 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2015 { 2016 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2017 Mat a = aij->A,b = aij->B; 2018 PetscErrorCode ierr; 2019 PetscInt s1,s2,s3; 2020 2021 PetscFunctionBegin; 2022 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2023 if (rr) { 2024 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2025 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2026 /* Overlap communication with computation. */ 2027 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2028 } 2029 if (ll) { 2030 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2031 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2032 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2033 } 2034 /* scale the diagonal block */ 2035 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2036 2037 if (rr) { 2038 /* Do a scatter end and then right scale the off-diagonal block */ 2039 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2040 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2041 } 2042 PetscFunctionReturn(0); 2043 } 2044 2045 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2046 { 2047 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2048 PetscErrorCode ierr; 2049 2050 PetscFunctionBegin; 2051 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2052 PetscFunctionReturn(0); 2053 } 2054 2055 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2056 { 2057 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2058 Mat a,b,c,d; 2059 PetscBool flg; 2060 PetscErrorCode ierr; 2061 2062 PetscFunctionBegin; 2063 a = matA->A; b = matA->B; 2064 c = matB->A; d = matB->B; 2065 2066 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2067 if (flg) { 2068 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2069 } 2070 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2071 PetscFunctionReturn(0); 2072 } 2073 2074 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2075 { 2076 PetscErrorCode ierr; 2077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2078 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2079 2080 PetscFunctionBegin; 2081 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2082 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2083 /* because of the column compression in the off-processor part of the matrix a->B, 2084 the number of columns in a->B and b->B may be different, hence we cannot call 2085 the MatCopy() directly on the two parts. If need be, we can provide a more 2086 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2087 then copying the submatrices */ 2088 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2089 } else { 2090 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2091 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2092 } 2093 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2094 PetscFunctionReturn(0); 2095 } 2096 2097 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2098 { 2099 PetscErrorCode ierr; 2100 2101 PetscFunctionBegin; 2102 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2103 PetscFunctionReturn(0); 2104 } 2105 2106 /* 2107 Computes the number of nonzeros per row needed for preallocation when X and Y 2108 have different nonzero structure. 2109 */ 2110 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2111 { 2112 PetscInt i,j,k,nzx,nzy; 2113 2114 PetscFunctionBegin; 2115 /* Set the number of nonzeros in the new matrix */ 2116 for (i=0; i<m; i++) { 2117 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2118 nzx = xi[i+1] - xi[i]; 2119 nzy = yi[i+1] - yi[i]; 2120 nnz[i] = 0; 2121 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2122 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2123 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2124 nnz[i]++; 2125 } 2126 for (; k<nzy; k++) nnz[i]++; 2127 } 2128 PetscFunctionReturn(0); 2129 } 2130 2131 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2132 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2133 { 2134 PetscErrorCode ierr; 2135 PetscInt m = Y->rmap->N; 2136 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2137 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2138 2139 PetscFunctionBegin; 2140 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2141 PetscFunctionReturn(0); 2142 } 2143 2144 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2145 { 2146 PetscErrorCode ierr; 2147 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2148 PetscBLASInt bnz,one=1; 2149 Mat_SeqAIJ *x,*y; 2150 2151 PetscFunctionBegin; 2152 if (str == SAME_NONZERO_PATTERN) { 2153 PetscScalar alpha = a; 2154 x = (Mat_SeqAIJ*)xx->A->data; 2155 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2156 y = (Mat_SeqAIJ*)yy->A->data; 2157 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2158 x = (Mat_SeqAIJ*)xx->B->data; 2159 y = (Mat_SeqAIJ*)yy->B->data; 2160 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2161 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2162 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2163 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2164 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2165 } else { 2166 Mat B; 2167 PetscInt *nnz_d,*nnz_o; 2168 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2169 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2170 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2171 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2172 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2173 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2174 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2175 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2176 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2177 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2178 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2179 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2180 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2181 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2182 } 2183 PetscFunctionReturn(0); 2184 } 2185 2186 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2187 2188 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2189 { 2190 #if defined(PETSC_USE_COMPLEX) 2191 PetscErrorCode ierr; 2192 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2193 2194 PetscFunctionBegin; 2195 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2196 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2197 #else 2198 PetscFunctionBegin; 2199 #endif 2200 PetscFunctionReturn(0); 2201 } 2202 2203 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2204 { 2205 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2206 PetscErrorCode ierr; 2207 2208 PetscFunctionBegin; 2209 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2210 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2215 { 2216 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2217 PetscErrorCode ierr; 2218 2219 PetscFunctionBegin; 2220 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2221 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2222 PetscFunctionReturn(0); 2223 } 2224 2225 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2226 { 2227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2228 PetscErrorCode ierr; 2229 PetscInt i,*idxb = 0; 2230 PetscScalar *va,*vb; 2231 Vec vtmp; 2232 2233 PetscFunctionBegin; 2234 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2235 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2236 if (idx) { 2237 for (i=0; i<A->rmap->n; i++) { 2238 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2239 } 2240 } 2241 2242 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2243 if (idx) { 2244 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2245 } 2246 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2247 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2248 2249 for (i=0; i<A->rmap->n; i++) { 2250 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2251 va[i] = vb[i]; 2252 if (idx) idx[i] = a->garray[idxb[i]]; 2253 } 2254 } 2255 2256 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2257 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2258 ierr = PetscFree(idxb);CHKERRQ(ierr); 2259 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2260 PetscFunctionReturn(0); 2261 } 2262 2263 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2264 { 2265 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2266 PetscErrorCode ierr; 2267 PetscInt i,*idxb = 0; 2268 PetscScalar *va,*vb; 2269 Vec vtmp; 2270 2271 PetscFunctionBegin; 2272 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2273 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2274 if (idx) { 2275 for (i=0; i<A->cmap->n; i++) { 2276 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2277 } 2278 } 2279 2280 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2281 if (idx) { 2282 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2283 } 2284 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2285 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2286 2287 for (i=0; i<A->rmap->n; i++) { 2288 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2289 va[i] = vb[i]; 2290 if (idx) idx[i] = a->garray[idxb[i]]; 2291 } 2292 } 2293 2294 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2295 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2296 ierr = PetscFree(idxb);CHKERRQ(ierr); 2297 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2298 PetscFunctionReturn(0); 2299 } 2300 2301 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2302 { 2303 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2304 PetscInt n = A->rmap->n; 2305 PetscInt cstart = A->cmap->rstart; 2306 PetscInt *cmap = mat->garray; 2307 PetscInt *diagIdx, *offdiagIdx; 2308 Vec diagV, offdiagV; 2309 PetscScalar *a, *diagA, *offdiagA; 2310 PetscInt r; 2311 PetscErrorCode ierr; 2312 2313 PetscFunctionBegin; 2314 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2315 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2316 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2317 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2318 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2319 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2320 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2321 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2322 for (r = 0; r < n; ++r) { 2323 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2324 a[r] = diagA[r]; 2325 idx[r] = cstart + diagIdx[r]; 2326 } else { 2327 a[r] = offdiagA[r]; 2328 idx[r] = cmap[offdiagIdx[r]]; 2329 } 2330 } 2331 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2332 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2333 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2334 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2335 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2336 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2341 { 2342 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2343 PetscInt n = A->rmap->n; 2344 PetscInt cstart = A->cmap->rstart; 2345 PetscInt *cmap = mat->garray; 2346 PetscInt *diagIdx, *offdiagIdx; 2347 Vec diagV, offdiagV; 2348 PetscScalar *a, *diagA, *offdiagA; 2349 PetscInt r; 2350 PetscErrorCode ierr; 2351 2352 PetscFunctionBegin; 2353 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2354 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2355 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2356 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2357 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2358 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2359 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2360 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2361 for (r = 0; r < n; ++r) { 2362 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2363 a[r] = diagA[r]; 2364 idx[r] = cstart + diagIdx[r]; 2365 } else { 2366 a[r] = offdiagA[r]; 2367 idx[r] = cmap[offdiagIdx[r]]; 2368 } 2369 } 2370 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2371 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2372 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2373 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2374 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2375 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2380 { 2381 PetscErrorCode ierr; 2382 Mat *dummy; 2383 2384 PetscFunctionBegin; 2385 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2386 *newmat = *dummy; 2387 ierr = PetscFree(dummy);CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390 2391 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2392 { 2393 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2394 PetscErrorCode ierr; 2395 2396 PetscFunctionBegin; 2397 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2398 A->factorerrortype = a->A->factorerrortype; 2399 PetscFunctionReturn(0); 2400 } 2401 2402 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2403 { 2404 PetscErrorCode ierr; 2405 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2406 2407 PetscFunctionBegin; 2408 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2409 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2410 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2411 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2416 { 2417 PetscFunctionBegin; 2418 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2419 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2420 PetscFunctionReturn(0); 2421 } 2422 2423 /*@ 2424 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2425 2426 Collective on Mat 2427 2428 Input Parameters: 2429 + A - the matrix 2430 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2431 2432 Level: advanced 2433 2434 @*/ 2435 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2436 { 2437 PetscErrorCode ierr; 2438 2439 PetscFunctionBegin; 2440 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2441 PetscFunctionReturn(0); 2442 } 2443 2444 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2445 { 2446 PetscErrorCode ierr; 2447 PetscBool sc = PETSC_FALSE,flg; 2448 2449 PetscFunctionBegin; 2450 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2451 ierr = PetscObjectOptionsBegin((PetscObject)A); 2452 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2453 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2454 if (flg) { 2455 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2456 } 2457 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2458 PetscFunctionReturn(0); 2459 } 2460 2461 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2462 { 2463 PetscErrorCode ierr; 2464 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2465 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2466 2467 PetscFunctionBegin; 2468 if (!Y->preallocated) { 2469 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2470 } else if (!aij->nz) { 2471 PetscInt nonew = aij->nonew; 2472 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2473 aij->nonew = nonew; 2474 } 2475 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2476 PetscFunctionReturn(0); 2477 } 2478 2479 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2480 { 2481 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2482 PetscErrorCode ierr; 2483 2484 PetscFunctionBegin; 2485 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2486 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2487 if (d) { 2488 PetscInt rstart; 2489 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2490 *d += rstart; 2491 2492 } 2493 PetscFunctionReturn(0); 2494 } 2495 2496 2497 /* -------------------------------------------------------------------*/ 2498 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2499 MatGetRow_MPIAIJ, 2500 MatRestoreRow_MPIAIJ, 2501 MatMult_MPIAIJ, 2502 /* 4*/ MatMultAdd_MPIAIJ, 2503 MatMultTranspose_MPIAIJ, 2504 MatMultTransposeAdd_MPIAIJ, 2505 0, 2506 0, 2507 0, 2508 /*10*/ 0, 2509 0, 2510 0, 2511 MatSOR_MPIAIJ, 2512 MatTranspose_MPIAIJ, 2513 /*15*/ MatGetInfo_MPIAIJ, 2514 MatEqual_MPIAIJ, 2515 MatGetDiagonal_MPIAIJ, 2516 MatDiagonalScale_MPIAIJ, 2517 MatNorm_MPIAIJ, 2518 /*20*/ MatAssemblyBegin_MPIAIJ, 2519 MatAssemblyEnd_MPIAIJ, 2520 MatSetOption_MPIAIJ, 2521 MatZeroEntries_MPIAIJ, 2522 /*24*/ MatZeroRows_MPIAIJ, 2523 0, 2524 0, 2525 0, 2526 0, 2527 /*29*/ MatSetUp_MPIAIJ, 2528 0, 2529 0, 2530 MatGetDiagonalBlock_MPIAIJ, 2531 0, 2532 /*34*/ MatDuplicate_MPIAIJ, 2533 0, 2534 0, 2535 0, 2536 0, 2537 /*39*/ MatAXPY_MPIAIJ, 2538 MatCreateSubMatrices_MPIAIJ, 2539 MatIncreaseOverlap_MPIAIJ, 2540 MatGetValues_MPIAIJ, 2541 MatCopy_MPIAIJ, 2542 /*44*/ MatGetRowMax_MPIAIJ, 2543 MatScale_MPIAIJ, 2544 MatShift_MPIAIJ, 2545 MatDiagonalSet_MPIAIJ, 2546 MatZeroRowsColumns_MPIAIJ, 2547 /*49*/ MatSetRandom_MPIAIJ, 2548 0, 2549 0, 2550 0, 2551 0, 2552 /*54*/ MatFDColoringCreate_MPIXAIJ, 2553 0, 2554 MatSetUnfactored_MPIAIJ, 2555 MatPermute_MPIAIJ, 2556 0, 2557 /*59*/ MatCreateSubMatrix_MPIAIJ, 2558 MatDestroy_MPIAIJ, 2559 MatView_MPIAIJ, 2560 0, 2561 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2562 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2563 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2564 0, 2565 0, 2566 0, 2567 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2568 MatGetRowMinAbs_MPIAIJ, 2569 0, 2570 0, 2571 0, 2572 0, 2573 /*75*/ MatFDColoringApply_AIJ, 2574 MatSetFromOptions_MPIAIJ, 2575 0, 2576 0, 2577 MatFindZeroDiagonals_MPIAIJ, 2578 /*80*/ 0, 2579 0, 2580 0, 2581 /*83*/ MatLoad_MPIAIJ, 2582 MatIsSymmetric_MPIAIJ, 2583 0, 2584 0, 2585 0, 2586 0, 2587 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2588 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2589 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2590 MatPtAP_MPIAIJ_MPIAIJ, 2591 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2592 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2593 0, 2594 0, 2595 0, 2596 0, 2597 /*99*/ 0, 2598 0, 2599 0, 2600 MatConjugate_MPIAIJ, 2601 0, 2602 /*104*/MatSetValuesRow_MPIAIJ, 2603 MatRealPart_MPIAIJ, 2604 MatImaginaryPart_MPIAIJ, 2605 0, 2606 0, 2607 /*109*/0, 2608 0, 2609 MatGetRowMin_MPIAIJ, 2610 0, 2611 MatMissingDiagonal_MPIAIJ, 2612 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2613 0, 2614 MatGetGhosts_MPIAIJ, 2615 0, 2616 0, 2617 /*119*/0, 2618 0, 2619 0, 2620 0, 2621 MatGetMultiProcBlock_MPIAIJ, 2622 /*124*/MatFindNonzeroRows_MPIAIJ, 2623 MatGetColumnNorms_MPIAIJ, 2624 MatInvertBlockDiagonal_MPIAIJ, 2625 0, 2626 MatCreateSubMatricesMPI_MPIAIJ, 2627 /*129*/0, 2628 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2629 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2630 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2631 0, 2632 /*134*/0, 2633 0, 2634 MatRARt_MPIAIJ_MPIAIJ, 2635 0, 2636 0, 2637 /*139*/MatSetBlockSizes_MPIAIJ, 2638 0, 2639 0, 2640 MatFDColoringSetUp_MPIXAIJ, 2641 MatFindOffBlockDiagonalEntries_MPIAIJ, 2642 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2643 }; 2644 2645 /* ----------------------------------------------------------------------------------------*/ 2646 2647 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2648 { 2649 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2650 PetscErrorCode ierr; 2651 2652 PetscFunctionBegin; 2653 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2654 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2655 PetscFunctionReturn(0); 2656 } 2657 2658 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2659 { 2660 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2661 PetscErrorCode ierr; 2662 2663 PetscFunctionBegin; 2664 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2665 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2666 PetscFunctionReturn(0); 2667 } 2668 2669 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2670 { 2671 Mat_MPIAIJ *b; 2672 PetscErrorCode ierr; 2673 2674 PetscFunctionBegin; 2675 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2676 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2677 b = (Mat_MPIAIJ*)B->data; 2678 2679 #if defined(PETSC_USE_CTABLE) 2680 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2681 #else 2682 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2683 #endif 2684 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2685 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2686 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2687 2688 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2689 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2690 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2691 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2692 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2693 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2694 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2695 2696 if (!B->preallocated) { 2697 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2698 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2699 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2700 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2701 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2702 } 2703 2704 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2705 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2706 B->preallocated = PETSC_TRUE; 2707 B->was_assembled = PETSC_FALSE; 2708 B->assembled = PETSC_FALSE;; 2709 PetscFunctionReturn(0); 2710 } 2711 2712 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2713 { 2714 Mat_MPIAIJ *b; 2715 PetscErrorCode ierr; 2716 2717 PetscFunctionBegin; 2718 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2719 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2720 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2721 b = (Mat_MPIAIJ*)B->data; 2722 2723 #if defined(PETSC_USE_CTABLE) 2724 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2725 #else 2726 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2727 #endif 2728 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2729 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2730 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2731 2732 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2733 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2734 B->preallocated = PETSC_TRUE; 2735 B->was_assembled = PETSC_FALSE; 2736 B->assembled = PETSC_FALSE; 2737 PetscFunctionReturn(0); 2738 } 2739 2740 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2741 { 2742 Mat mat; 2743 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 *newmat = 0; 2748 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2749 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2750 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2751 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2752 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2753 a = (Mat_MPIAIJ*)mat->data; 2754 2755 mat->factortype = matin->factortype; 2756 mat->assembled = PETSC_TRUE; 2757 mat->insertmode = NOT_SET_VALUES; 2758 mat->preallocated = PETSC_TRUE; 2759 2760 a->size = oldmat->size; 2761 a->rank = oldmat->rank; 2762 a->donotstash = oldmat->donotstash; 2763 a->roworiented = oldmat->roworiented; 2764 a->rowindices = 0; 2765 a->rowvalues = 0; 2766 a->getrowactive = PETSC_FALSE; 2767 2768 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2769 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2770 2771 if (oldmat->colmap) { 2772 #if defined(PETSC_USE_CTABLE) 2773 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2774 #else 2775 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2776 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2777 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2778 #endif 2779 } else a->colmap = 0; 2780 if (oldmat->garray) { 2781 PetscInt len; 2782 len = oldmat->B->cmap->n; 2783 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2784 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2785 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2786 } else a->garray = 0; 2787 2788 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2789 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2790 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2791 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2792 2793 if (oldmat->Mvctx_mpi1) { 2794 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2795 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2796 } 2797 2798 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2799 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2800 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2801 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2802 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2803 *newmat = mat; 2804 PetscFunctionReturn(0); 2805 } 2806 2807 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2808 { 2809 PetscScalar *vals,*svals; 2810 MPI_Comm comm; 2811 PetscErrorCode ierr; 2812 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2813 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2814 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2815 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2816 PetscInt cend,cstart,n,*rowners; 2817 int fd; 2818 PetscInt bs = newMat->rmap->bs; 2819 2820 PetscFunctionBegin; 2821 /* force binary viewer to load .info file if it has not yet done so */ 2822 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2823 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2824 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2825 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2826 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2827 if (!rank) { 2828 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2829 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2830 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2831 } 2832 2833 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2834 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2835 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2836 if (bs < 0) bs = 1; 2837 2838 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2839 M = header[1]; N = header[2]; 2840 2841 /* If global sizes are set, check if they are consistent with that given in the file */ 2842 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2843 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2844 2845 /* determine ownership of all (block) rows */ 2846 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2847 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2848 else m = newMat->rmap->n; /* Set by user */ 2849 2850 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2851 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2852 2853 /* First process needs enough room for process with most rows */ 2854 if (!rank) { 2855 mmax = rowners[1]; 2856 for (i=2; i<=size; i++) { 2857 mmax = PetscMax(mmax, rowners[i]); 2858 } 2859 } else mmax = -1; /* unused, but compilers complain */ 2860 2861 rowners[0] = 0; 2862 for (i=2; i<=size; i++) { 2863 rowners[i] += rowners[i-1]; 2864 } 2865 rstart = rowners[rank]; 2866 rend = rowners[rank+1]; 2867 2868 /* distribute row lengths to all processors */ 2869 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2870 if (!rank) { 2871 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2872 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2873 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2874 for (j=0; j<m; j++) { 2875 procsnz[0] += ourlens[j]; 2876 } 2877 for (i=1; i<size; i++) { 2878 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2879 /* calculate the number of nonzeros on each processor */ 2880 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2881 procsnz[i] += rowlengths[j]; 2882 } 2883 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2884 } 2885 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2886 } else { 2887 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2888 } 2889 2890 if (!rank) { 2891 /* determine max buffer needed and allocate it */ 2892 maxnz = 0; 2893 for (i=0; i<size; i++) { 2894 maxnz = PetscMax(maxnz,procsnz[i]); 2895 } 2896 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2897 2898 /* read in my part of the matrix column indices */ 2899 nz = procsnz[0]; 2900 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2901 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2902 2903 /* read in every one elses and ship off */ 2904 for (i=1; i<size; i++) { 2905 nz = procsnz[i]; 2906 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2907 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2908 } 2909 ierr = PetscFree(cols);CHKERRQ(ierr); 2910 } else { 2911 /* determine buffer space needed for message */ 2912 nz = 0; 2913 for (i=0; i<m; i++) { 2914 nz += ourlens[i]; 2915 } 2916 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2917 2918 /* receive message of column indices*/ 2919 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2920 } 2921 2922 /* determine column ownership if matrix is not square */ 2923 if (N != M) { 2924 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2925 else n = newMat->cmap->n; 2926 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2927 cstart = cend - n; 2928 } else { 2929 cstart = rstart; 2930 cend = rend; 2931 n = cend - cstart; 2932 } 2933 2934 /* loop over local rows, determining number of off diagonal entries */ 2935 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2936 jj = 0; 2937 for (i=0; i<m; i++) { 2938 for (j=0; j<ourlens[i]; j++) { 2939 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2940 jj++; 2941 } 2942 } 2943 2944 for (i=0; i<m; i++) { 2945 ourlens[i] -= offlens[i]; 2946 } 2947 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2948 2949 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2950 2951 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2952 2953 for (i=0; i<m; i++) { 2954 ourlens[i] += offlens[i]; 2955 } 2956 2957 if (!rank) { 2958 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2959 2960 /* read in my part of the matrix numerical values */ 2961 nz = procsnz[0]; 2962 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2963 2964 /* insert into matrix */ 2965 jj = rstart; 2966 smycols = mycols; 2967 svals = vals; 2968 for (i=0; i<m; i++) { 2969 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2970 smycols += ourlens[i]; 2971 svals += ourlens[i]; 2972 jj++; 2973 } 2974 2975 /* read in other processors and ship out */ 2976 for (i=1; i<size; i++) { 2977 nz = procsnz[i]; 2978 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2979 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2980 } 2981 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2982 } else { 2983 /* receive numeric values */ 2984 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2985 2986 /* receive message of values*/ 2987 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2988 2989 /* insert into matrix */ 2990 jj = rstart; 2991 smycols = mycols; 2992 svals = vals; 2993 for (i=0; i<m; i++) { 2994 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2995 smycols += ourlens[i]; 2996 svals += ourlens[i]; 2997 jj++; 2998 } 2999 } 3000 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3001 ierr = PetscFree(vals);CHKERRQ(ierr); 3002 ierr = PetscFree(mycols);CHKERRQ(ierr); 3003 ierr = PetscFree(rowners);CHKERRQ(ierr); 3004 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3005 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3006 PetscFunctionReturn(0); 3007 } 3008 3009 /* Not scalable because of ISAllGather() unless getting all columns. */ 3010 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3011 { 3012 PetscErrorCode ierr; 3013 IS iscol_local; 3014 PetscBool isstride; 3015 PetscMPIInt lisstride=0,gisstride; 3016 3017 PetscFunctionBegin; 3018 /* check if we are grabbing all columns*/ 3019 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3020 3021 if (isstride) { 3022 PetscInt start,len,mstart,mlen; 3023 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3024 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3025 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3026 if (mstart == start && mlen-mstart == len) lisstride = 1; 3027 } 3028 3029 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3030 if (gisstride) { 3031 PetscInt N; 3032 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3033 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3034 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3035 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3036 } else { 3037 PetscInt cbs; 3038 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3039 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3040 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3041 } 3042 3043 *isseq = iscol_local; 3044 PetscFunctionReturn(0); 3045 } 3046 3047 /* 3048 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3049 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3050 3051 Input Parameters: 3052 mat - matrix 3053 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3054 i.e., mat->rstart <= isrow[i] < mat->rend 3055 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3056 i.e., mat->cstart <= iscol[i] < mat->cend 3057 Output Parameter: 3058 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3059 iscol_o - sequential column index set for retrieving mat->B 3060 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3061 */ 3062 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3063 { 3064 PetscErrorCode ierr; 3065 Vec x,cmap; 3066 const PetscInt *is_idx; 3067 PetscScalar *xarray,*cmaparray; 3068 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3069 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3070 Mat B=a->B; 3071 Vec lvec=a->lvec,lcmap; 3072 PetscInt i,cstart,cend,Bn=B->cmap->N; 3073 MPI_Comm comm; 3074 VecScatter Mvctx=a->Mvctx; 3075 3076 PetscFunctionBegin; 3077 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3078 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3079 3080 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3081 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3082 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3083 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3084 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3085 3086 /* Get start indices */ 3087 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3088 isstart -= ncols; 3089 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3090 3091 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3092 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3093 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3094 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3095 for (i=0; i<ncols; i++) { 3096 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3097 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3098 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3099 } 3100 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3101 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3102 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3103 3104 /* Get iscol_d */ 3105 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3106 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3107 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3108 3109 /* Get isrow_d */ 3110 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3111 rstart = mat->rmap->rstart; 3112 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3113 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3114 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3115 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3116 3117 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3118 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3119 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3120 3121 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3122 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3123 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3124 3125 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3126 3127 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3128 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3129 3130 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3131 /* off-process column indices */ 3132 count = 0; 3133 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3134 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3135 3136 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3137 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3138 for (i=0; i<Bn; i++) { 3139 if (PetscRealPart(xarray[i]) > -1.0) { 3140 idx[count] = i; /* local column index in off-diagonal part B */ 3141 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3142 count++; 3143 } 3144 } 3145 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3146 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3147 3148 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3149 /* cannot ensure iscol_o has same blocksize as iscol! */ 3150 3151 ierr = PetscFree(idx);CHKERRQ(ierr); 3152 *garray = cmap1; 3153 3154 ierr = VecDestroy(&x);CHKERRQ(ierr); 3155 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3156 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3157 PetscFunctionReturn(0); 3158 } 3159 3160 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3161 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3162 { 3163 PetscErrorCode ierr; 3164 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3165 Mat M = NULL; 3166 MPI_Comm comm; 3167 IS iscol_d,isrow_d,iscol_o; 3168 Mat Asub = NULL,Bsub = NULL; 3169 PetscInt n; 3170 3171 PetscFunctionBegin; 3172 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3173 3174 if (call == MAT_REUSE_MATRIX) { 3175 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3176 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3177 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3178 3179 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3180 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3181 3182 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3183 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3184 3185 /* Update diagonal and off-diagonal portions of submat */ 3186 asub = (Mat_MPIAIJ*)(*submat)->data; 3187 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3188 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3189 if (n) { 3190 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3191 } 3192 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3193 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3194 3195 } else { /* call == MAT_INITIAL_MATRIX) */ 3196 const PetscInt *garray; 3197 PetscInt BsubN; 3198 3199 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3200 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3201 3202 /* Create local submatrices Asub and Bsub */ 3203 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3204 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3205 3206 /* Create submatrix M */ 3207 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3208 3209 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3210 asub = (Mat_MPIAIJ*)M->data; 3211 3212 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3213 n = asub->B->cmap->N; 3214 if (BsubN > n) { 3215 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3216 const PetscInt *idx; 3217 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3218 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3219 3220 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3221 j = 0; 3222 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3223 for (i=0; i<n; i++) { 3224 if (j >= BsubN) break; 3225 while (subgarray[i] > garray[j]) j++; 3226 3227 if (subgarray[i] == garray[j]) { 3228 idx_new[i] = idx[j++]; 3229 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3230 } 3231 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3232 3233 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3234 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3235 3236 } else if (BsubN < n) { 3237 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3238 } 3239 3240 ierr = PetscFree(garray);CHKERRQ(ierr); 3241 *submat = M; 3242 3243 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3244 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3245 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3246 3247 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3248 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3249 3250 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3251 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3252 } 3253 PetscFunctionReturn(0); 3254 } 3255 3256 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3257 { 3258 PetscErrorCode ierr; 3259 IS iscol_local=NULL,isrow_d; 3260 PetscInt csize; 3261 PetscInt n,i,j,start,end; 3262 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3263 MPI_Comm comm; 3264 3265 PetscFunctionBegin; 3266 /* If isrow has same processor distribution as mat, 3267 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3268 if (call == MAT_REUSE_MATRIX) { 3269 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3270 if (isrow_d) { 3271 sameRowDist = PETSC_TRUE; 3272 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3273 } else { 3274 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3275 if (iscol_local) { 3276 sameRowDist = PETSC_TRUE; 3277 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3278 } 3279 } 3280 } else { 3281 /* Check if isrow has same processor distribution as mat */ 3282 sameDist[0] = PETSC_FALSE; 3283 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3284 if (!n) { 3285 sameDist[0] = PETSC_TRUE; 3286 } else { 3287 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3288 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3289 if (i >= start && j < end) { 3290 sameDist[0] = PETSC_TRUE; 3291 } 3292 } 3293 3294 /* Check if iscol has same processor distribution as mat */ 3295 sameDist[1] = PETSC_FALSE; 3296 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3297 if (!n) { 3298 sameDist[1] = PETSC_TRUE; 3299 } else { 3300 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3301 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3302 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3303 } 3304 3305 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3306 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3307 sameRowDist = tsameDist[0]; 3308 } 3309 3310 if (sameRowDist) { 3311 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3312 /* isrow and iscol have same processor distribution as mat */ 3313 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3314 PetscFunctionReturn(0); 3315 } else { /* sameRowDist */ 3316 /* isrow has same processor distribution as mat */ 3317 if (call == MAT_INITIAL_MATRIX) { 3318 PetscBool sorted; 3319 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3320 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3321 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3322 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3323 3324 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3325 if (sorted) { 3326 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3327 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3328 PetscFunctionReturn(0); 3329 } 3330 } else { /* call == MAT_REUSE_MATRIX */ 3331 IS iscol_sub; 3332 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3333 if (iscol_sub) { 3334 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3335 PetscFunctionReturn(0); 3336 } 3337 } 3338 } 3339 } 3340 3341 /* General case: iscol -> iscol_local which has global size of iscol */ 3342 if (call == MAT_REUSE_MATRIX) { 3343 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3344 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3345 } else { 3346 if (!iscol_local) { 3347 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3348 } 3349 } 3350 3351 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3352 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3353 3354 if (call == MAT_INITIAL_MATRIX) { 3355 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3356 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3357 } 3358 PetscFunctionReturn(0); 3359 } 3360 3361 /*@C 3362 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3363 and "off-diagonal" part of the matrix in CSR format. 3364 3365 Collective on MPI_Comm 3366 3367 Input Parameters: 3368 + comm - MPI communicator 3369 . A - "diagonal" portion of matrix 3370 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3371 - garray - global index of B columns 3372 3373 Output Parameter: 3374 . mat - the matrix, with input A as its local diagonal matrix 3375 Level: advanced 3376 3377 Notes: 3378 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3379 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3380 3381 .seealso: MatCreateMPIAIJWithSplitArrays() 3382 @*/ 3383 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3384 { 3385 PetscErrorCode ierr; 3386 Mat_MPIAIJ *maij; 3387 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3388 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3389 PetscScalar *oa=b->a; 3390 Mat Bnew; 3391 PetscInt m,n,N; 3392 3393 PetscFunctionBegin; 3394 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3395 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3396 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3397 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3398 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3399 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3400 3401 /* Get global columns of mat */ 3402 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3403 3404 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3405 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3406 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3407 maij = (Mat_MPIAIJ*)(*mat)->data; 3408 3409 (*mat)->preallocated = PETSC_TRUE; 3410 3411 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3412 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3413 3414 /* Set A as diagonal portion of *mat */ 3415 maij->A = A; 3416 3417 nz = oi[m]; 3418 for (i=0; i<nz; i++) { 3419 col = oj[i]; 3420 oj[i] = garray[col]; 3421 } 3422 3423 /* Set Bnew as off-diagonal portion of *mat */ 3424 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3425 bnew = (Mat_SeqAIJ*)Bnew->data; 3426 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3427 maij->B = Bnew; 3428 3429 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3430 3431 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3432 b->free_a = PETSC_FALSE; 3433 b->free_ij = PETSC_FALSE; 3434 ierr = MatDestroy(&B);CHKERRQ(ierr); 3435 3436 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3437 bnew->free_a = PETSC_TRUE; 3438 bnew->free_ij = PETSC_TRUE; 3439 3440 /* condense columns of maij->B */ 3441 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3442 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3443 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3444 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3445 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3446 PetscFunctionReturn(0); 3447 } 3448 3449 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3450 3451 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3452 { 3453 PetscErrorCode ierr; 3454 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3455 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3456 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3457 Mat M,Msub,B=a->B; 3458 MatScalar *aa; 3459 Mat_SeqAIJ *aij; 3460 PetscInt *garray = a->garray,*colsub,Ncols; 3461 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3462 IS iscol_sub,iscmap; 3463 const PetscInt *is_idx,*cmap; 3464 PetscBool allcolumns=PETSC_FALSE; 3465 MPI_Comm comm; 3466 3467 PetscFunctionBegin; 3468 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3469 3470 if (call == MAT_REUSE_MATRIX) { 3471 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3472 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3473 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3474 3475 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3476 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3477 3478 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3479 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3480 3481 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3482 3483 } else { /* call == MAT_INITIAL_MATRIX) */ 3484 PetscBool flg; 3485 3486 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3487 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3488 3489 /* (1) iscol -> nonscalable iscol_local */ 3490 /* Check for special case: each processor gets entire matrix columns */ 3491 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3492 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3493 if (allcolumns) { 3494 iscol_sub = iscol_local; 3495 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3496 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3497 3498 } else { 3499 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3500 PetscInt *idx,*cmap1,k; 3501 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3502 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3503 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3504 count = 0; 3505 k = 0; 3506 for (i=0; i<Ncols; i++) { 3507 j = is_idx[i]; 3508 if (j >= cstart && j < cend) { 3509 /* diagonal part of mat */ 3510 idx[count] = j; 3511 cmap1[count++] = i; /* column index in submat */ 3512 } else if (Bn) { 3513 /* off-diagonal part of mat */ 3514 if (j == garray[k]) { 3515 idx[count] = j; 3516 cmap1[count++] = i; /* column index in submat */ 3517 } else if (j > garray[k]) { 3518 while (j > garray[k] && k < Bn-1) k++; 3519 if (j == garray[k]) { 3520 idx[count] = j; 3521 cmap1[count++] = i; /* column index in submat */ 3522 } 3523 } 3524 } 3525 } 3526 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3527 3528 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3529 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3530 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3531 3532 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3533 } 3534 3535 /* (3) Create sequential Msub */ 3536 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3537 } 3538 3539 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3540 aij = (Mat_SeqAIJ*)(Msub)->data; 3541 ii = aij->i; 3542 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3543 3544 /* 3545 m - number of local rows 3546 Ncols - number of columns (same on all processors) 3547 rstart - first row in new global matrix generated 3548 */ 3549 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3550 3551 if (call == MAT_INITIAL_MATRIX) { 3552 /* (4) Create parallel newmat */ 3553 PetscMPIInt rank,size; 3554 PetscInt csize; 3555 3556 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3557 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3558 3559 /* 3560 Determine the number of non-zeros in the diagonal and off-diagonal 3561 portions of the matrix in order to do correct preallocation 3562 */ 3563 3564 /* first get start and end of "diagonal" columns */ 3565 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3566 if (csize == PETSC_DECIDE) { 3567 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3568 if (mglobal == Ncols) { /* square matrix */ 3569 nlocal = m; 3570 } else { 3571 nlocal = Ncols/size + ((Ncols % size) > rank); 3572 } 3573 } else { 3574 nlocal = csize; 3575 } 3576 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3577 rstart = rend - nlocal; 3578 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3579 3580 /* next, compute all the lengths */ 3581 jj = aij->j; 3582 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3583 olens = dlens + m; 3584 for (i=0; i<m; i++) { 3585 jend = ii[i+1] - ii[i]; 3586 olen = 0; 3587 dlen = 0; 3588 for (j=0; j<jend; j++) { 3589 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3590 else dlen++; 3591 jj++; 3592 } 3593 olens[i] = olen; 3594 dlens[i] = dlen; 3595 } 3596 3597 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3598 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3599 3600 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3601 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3602 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3603 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3604 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3605 ierr = PetscFree(dlens);CHKERRQ(ierr); 3606 3607 } else { /* call == MAT_REUSE_MATRIX */ 3608 M = *newmat; 3609 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3610 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3611 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3612 /* 3613 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3614 rather than the slower MatSetValues(). 3615 */ 3616 M->was_assembled = PETSC_TRUE; 3617 M->assembled = PETSC_FALSE; 3618 } 3619 3620 /* (5) Set values of Msub to *newmat */ 3621 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3622 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3623 3624 jj = aij->j; 3625 aa = aij->a; 3626 for (i=0; i<m; i++) { 3627 row = rstart + i; 3628 nz = ii[i+1] - ii[i]; 3629 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3630 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3631 jj += nz; aa += nz; 3632 } 3633 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3634 3635 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3636 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3637 3638 ierr = PetscFree(colsub);CHKERRQ(ierr); 3639 3640 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3641 if (call == MAT_INITIAL_MATRIX) { 3642 *newmat = M; 3643 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3644 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3645 3646 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3647 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3648 3649 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3650 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3651 3652 if (iscol_local) { 3653 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3654 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3655 } 3656 } 3657 PetscFunctionReturn(0); 3658 } 3659 3660 /* 3661 Not great since it makes two copies of the submatrix, first an SeqAIJ 3662 in local and then by concatenating the local matrices the end result. 3663 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3664 3665 Note: This requires a sequential iscol with all indices. 3666 */ 3667 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3668 { 3669 PetscErrorCode ierr; 3670 PetscMPIInt rank,size; 3671 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3672 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3673 Mat M,Mreuse; 3674 MatScalar *aa,*vwork; 3675 MPI_Comm comm; 3676 Mat_SeqAIJ *aij; 3677 PetscBool colflag,allcolumns=PETSC_FALSE; 3678 3679 PetscFunctionBegin; 3680 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3681 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3682 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3683 3684 /* Check for special case: each processor gets entire matrix columns */ 3685 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3686 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3687 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3688 3689 if (call == MAT_REUSE_MATRIX) { 3690 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3691 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3692 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3693 } else { 3694 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3695 } 3696 3697 /* 3698 m - number of local rows 3699 n - number of columns (same on all processors) 3700 rstart - first row in new global matrix generated 3701 */ 3702 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3703 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3704 if (call == MAT_INITIAL_MATRIX) { 3705 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3706 ii = aij->i; 3707 jj = aij->j; 3708 3709 /* 3710 Determine the number of non-zeros in the diagonal and off-diagonal 3711 portions of the matrix in order to do correct preallocation 3712 */ 3713 3714 /* first get start and end of "diagonal" columns */ 3715 if (csize == PETSC_DECIDE) { 3716 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3717 if (mglobal == n) { /* square matrix */ 3718 nlocal = m; 3719 } else { 3720 nlocal = n/size + ((n % size) > rank); 3721 } 3722 } else { 3723 nlocal = csize; 3724 } 3725 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3726 rstart = rend - nlocal; 3727 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3728 3729 /* next, compute all the lengths */ 3730 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3731 olens = dlens + m; 3732 for (i=0; i<m; i++) { 3733 jend = ii[i+1] - ii[i]; 3734 olen = 0; 3735 dlen = 0; 3736 for (j=0; j<jend; j++) { 3737 if (*jj < rstart || *jj >= rend) olen++; 3738 else dlen++; 3739 jj++; 3740 } 3741 olens[i] = olen; 3742 dlens[i] = dlen; 3743 } 3744 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3745 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3746 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3747 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3748 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3749 ierr = PetscFree(dlens);CHKERRQ(ierr); 3750 } else { 3751 PetscInt ml,nl; 3752 3753 M = *newmat; 3754 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3755 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3756 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3757 /* 3758 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3759 rather than the slower MatSetValues(). 3760 */ 3761 M->was_assembled = PETSC_TRUE; 3762 M->assembled = PETSC_FALSE; 3763 } 3764 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3765 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3766 ii = aij->i; 3767 jj = aij->j; 3768 aa = aij->a; 3769 for (i=0; i<m; i++) { 3770 row = rstart + i; 3771 nz = ii[i+1] - ii[i]; 3772 cwork = jj; jj += nz; 3773 vwork = aa; aa += nz; 3774 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3775 } 3776 3777 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3778 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3779 *newmat = M; 3780 3781 /* save submatrix used in processor for next request */ 3782 if (call == MAT_INITIAL_MATRIX) { 3783 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3784 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3785 } 3786 PetscFunctionReturn(0); 3787 } 3788 3789 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3790 { 3791 PetscInt m,cstart, cend,j,nnz,i,d; 3792 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3793 const PetscInt *JJ; 3794 PetscScalar *values; 3795 PetscErrorCode ierr; 3796 PetscBool nooffprocentries; 3797 3798 PetscFunctionBegin; 3799 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3800 3801 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3802 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3803 m = B->rmap->n; 3804 cstart = B->cmap->rstart; 3805 cend = B->cmap->rend; 3806 rstart = B->rmap->rstart; 3807 3808 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3809 3810 #if defined(PETSC_USE_DEBUG) 3811 for (i=0; i<m; i++) { 3812 nnz = Ii[i+1]- Ii[i]; 3813 JJ = J + Ii[i]; 3814 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3815 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3816 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3817 } 3818 #endif 3819 3820 for (i=0; i<m; i++) { 3821 nnz = Ii[i+1]- Ii[i]; 3822 JJ = J + Ii[i]; 3823 nnz_max = PetscMax(nnz_max,nnz); 3824 d = 0; 3825 for (j=0; j<nnz; j++) { 3826 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3827 } 3828 d_nnz[i] = d; 3829 o_nnz[i] = nnz - d; 3830 } 3831 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3832 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3833 3834 if (v) values = (PetscScalar*)v; 3835 else { 3836 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3837 } 3838 3839 for (i=0; i<m; i++) { 3840 ii = i + rstart; 3841 nnz = Ii[i+1]- Ii[i]; 3842 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3843 } 3844 nooffprocentries = B->nooffprocentries; 3845 B->nooffprocentries = PETSC_TRUE; 3846 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3847 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3848 B->nooffprocentries = nooffprocentries; 3849 3850 if (!v) { 3851 ierr = PetscFree(values);CHKERRQ(ierr); 3852 } 3853 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3854 PetscFunctionReturn(0); 3855 } 3856 3857 /*@ 3858 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3859 (the default parallel PETSc format). 3860 3861 Collective on MPI_Comm 3862 3863 Input Parameters: 3864 + B - the matrix 3865 . i - the indices into j for the start of each local row (starts with zero) 3866 . j - the column indices for each local row (starts with zero) 3867 - v - optional values in the matrix 3868 3869 Level: developer 3870 3871 Notes: 3872 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3873 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3874 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3875 3876 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3877 3878 The format which is used for the sparse matrix input, is equivalent to a 3879 row-major ordering.. i.e for the following matrix, the input data expected is 3880 as shown 3881 3882 $ 1 0 0 3883 $ 2 0 3 P0 3884 $ ------- 3885 $ 4 5 6 P1 3886 $ 3887 $ Process0 [P0]: rows_owned=[0,1] 3888 $ i = {0,1,3} [size = nrow+1 = 2+1] 3889 $ j = {0,0,2} [size = 3] 3890 $ v = {1,2,3} [size = 3] 3891 $ 3892 $ Process1 [P1]: rows_owned=[2] 3893 $ i = {0,3} [size = nrow+1 = 1+1] 3894 $ j = {0,1,2} [size = 3] 3895 $ v = {4,5,6} [size = 3] 3896 3897 .keywords: matrix, aij, compressed row, sparse, parallel 3898 3899 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3900 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3901 @*/ 3902 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3903 { 3904 PetscErrorCode ierr; 3905 3906 PetscFunctionBegin; 3907 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3908 PetscFunctionReturn(0); 3909 } 3910 3911 /*@C 3912 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3913 (the default parallel PETSc format). For good matrix assembly performance 3914 the user should preallocate the matrix storage by setting the parameters 3915 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3916 performance can be increased by more than a factor of 50. 3917 3918 Collective on MPI_Comm 3919 3920 Input Parameters: 3921 + B - the matrix 3922 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3923 (same value is used for all local rows) 3924 . d_nnz - array containing the number of nonzeros in the various rows of the 3925 DIAGONAL portion of the local submatrix (possibly different for each row) 3926 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3927 The size of this array is equal to the number of local rows, i.e 'm'. 3928 For matrices that will be factored, you must leave room for (and set) 3929 the diagonal entry even if it is zero. 3930 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3931 submatrix (same value is used for all local rows). 3932 - o_nnz - array containing the number of nonzeros in the various rows of the 3933 OFF-DIAGONAL portion of the local submatrix (possibly different for 3934 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3935 structure. The size of this array is equal to the number 3936 of local rows, i.e 'm'. 3937 3938 If the *_nnz parameter is given then the *_nz parameter is ignored 3939 3940 The AIJ format (also called the Yale sparse matrix format or 3941 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3942 storage. The stored row and column indices begin with zero. 3943 See Users-Manual: ch_mat for details. 3944 3945 The parallel matrix is partitioned such that the first m0 rows belong to 3946 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3947 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3948 3949 The DIAGONAL portion of the local submatrix of a processor can be defined 3950 as the submatrix which is obtained by extraction the part corresponding to 3951 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3952 first row that belongs to the processor, r2 is the last row belonging to 3953 the this processor, and c1-c2 is range of indices of the local part of a 3954 vector suitable for applying the matrix to. This is an mxn matrix. In the 3955 common case of a square matrix, the row and column ranges are the same and 3956 the DIAGONAL part is also square. The remaining portion of the local 3957 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3958 3959 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3960 3961 You can call MatGetInfo() to get information on how effective the preallocation was; 3962 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3963 You can also run with the option -info and look for messages with the string 3964 malloc in them to see if additional memory allocation was needed. 3965 3966 Example usage: 3967 3968 Consider the following 8x8 matrix with 34 non-zero values, that is 3969 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3970 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3971 as follows: 3972 3973 .vb 3974 1 2 0 | 0 3 0 | 0 4 3975 Proc0 0 5 6 | 7 0 0 | 8 0 3976 9 0 10 | 11 0 0 | 12 0 3977 ------------------------------------- 3978 13 0 14 | 15 16 17 | 0 0 3979 Proc1 0 18 0 | 19 20 21 | 0 0 3980 0 0 0 | 22 23 0 | 24 0 3981 ------------------------------------- 3982 Proc2 25 26 27 | 0 0 28 | 29 0 3983 30 0 0 | 31 32 33 | 0 34 3984 .ve 3985 3986 This can be represented as a collection of submatrices as: 3987 3988 .vb 3989 A B C 3990 D E F 3991 G H I 3992 .ve 3993 3994 Where the submatrices A,B,C are owned by proc0, D,E,F are 3995 owned by proc1, G,H,I are owned by proc2. 3996 3997 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3998 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3999 The 'M','N' parameters are 8,8, and have the same values on all procs. 4000 4001 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4002 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4003 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4004 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4005 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4006 matrix, ans [DF] as another SeqAIJ matrix. 4007 4008 When d_nz, o_nz parameters are specified, d_nz storage elements are 4009 allocated for every row of the local diagonal submatrix, and o_nz 4010 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4011 One way to choose d_nz and o_nz is to use the max nonzerors per local 4012 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4013 In this case, the values of d_nz,o_nz are: 4014 .vb 4015 proc0 : dnz = 2, o_nz = 2 4016 proc1 : dnz = 3, o_nz = 2 4017 proc2 : dnz = 1, o_nz = 4 4018 .ve 4019 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4020 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4021 for proc3. i.e we are using 12+15+10=37 storage locations to store 4022 34 values. 4023 4024 When d_nnz, o_nnz parameters are specified, the storage is specified 4025 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4026 In the above case the values for d_nnz,o_nnz are: 4027 .vb 4028 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4029 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4030 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4031 .ve 4032 Here the space allocated is sum of all the above values i.e 34, and 4033 hence pre-allocation is perfect. 4034 4035 Level: intermediate 4036 4037 .keywords: matrix, aij, compressed row, sparse, parallel 4038 4039 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4040 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4041 @*/ 4042 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4043 { 4044 PetscErrorCode ierr; 4045 4046 PetscFunctionBegin; 4047 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4048 PetscValidType(B,1); 4049 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4050 PetscFunctionReturn(0); 4051 } 4052 4053 /*@ 4054 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4055 CSR format the local rows. 4056 4057 Collective on MPI_Comm 4058 4059 Input Parameters: 4060 + comm - MPI communicator 4061 . m - number of local rows (Cannot be PETSC_DECIDE) 4062 . n - This value should be the same as the local size used in creating the 4063 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4064 calculated if N is given) For square matrices n is almost always m. 4065 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4066 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4067 . i - row indices 4068 . j - column indices 4069 - a - matrix values 4070 4071 Output Parameter: 4072 . mat - the matrix 4073 4074 Level: intermediate 4075 4076 Notes: 4077 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4078 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4079 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4080 4081 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4082 4083 The format which is used for the sparse matrix input, is equivalent to a 4084 row-major ordering.. i.e for the following matrix, the input data expected is 4085 as shown 4086 4087 $ 1 0 0 4088 $ 2 0 3 P0 4089 $ ------- 4090 $ 4 5 6 P1 4091 $ 4092 $ Process0 [P0]: rows_owned=[0,1] 4093 $ i = {0,1,3} [size = nrow+1 = 2+1] 4094 $ j = {0,0,2} [size = 3] 4095 $ v = {1,2,3} [size = 3] 4096 $ 4097 $ Process1 [P1]: rows_owned=[2] 4098 $ i = {0,3} [size = nrow+1 = 1+1] 4099 $ j = {0,1,2} [size = 3] 4100 $ v = {4,5,6} [size = 3] 4101 4102 .keywords: matrix, aij, compressed row, sparse, parallel 4103 4104 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4105 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4106 @*/ 4107 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4108 { 4109 PetscErrorCode ierr; 4110 4111 PetscFunctionBegin; 4112 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4113 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4114 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4115 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4116 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4117 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4118 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4119 PetscFunctionReturn(0); 4120 } 4121 4122 /*@C 4123 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4124 (the default parallel PETSc format). For good matrix assembly performance 4125 the user should preallocate the matrix storage by setting the parameters 4126 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4127 performance can be increased by more than a factor of 50. 4128 4129 Collective on MPI_Comm 4130 4131 Input Parameters: 4132 + comm - MPI communicator 4133 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4134 This value should be the same as the local size used in creating the 4135 y vector for the matrix-vector product y = Ax. 4136 . n - This value should be the same as the local size used in creating the 4137 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4138 calculated if N is given) For square matrices n is almost always m. 4139 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4140 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4141 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4142 (same value is used for all local rows) 4143 . d_nnz - array containing the number of nonzeros in the various rows of the 4144 DIAGONAL portion of the local submatrix (possibly different for each row) 4145 or NULL, if d_nz is used to specify the nonzero structure. 4146 The size of this array is equal to the number of local rows, i.e 'm'. 4147 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4148 submatrix (same value is used for all local rows). 4149 - o_nnz - array containing the number of nonzeros in the various rows of the 4150 OFF-DIAGONAL portion of the local submatrix (possibly different for 4151 each row) or NULL, if o_nz is used to specify the nonzero 4152 structure. The size of this array is equal to the number 4153 of local rows, i.e 'm'. 4154 4155 Output Parameter: 4156 . A - the matrix 4157 4158 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4159 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4160 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4161 4162 Notes: 4163 If the *_nnz parameter is given then the *_nz parameter is ignored 4164 4165 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4166 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4167 storage requirements for this matrix. 4168 4169 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4170 processor than it must be used on all processors that share the object for 4171 that argument. 4172 4173 The user MUST specify either the local or global matrix dimensions 4174 (possibly both). 4175 4176 The parallel matrix is partitioned across processors such that the 4177 first m0 rows belong to process 0, the next m1 rows belong to 4178 process 1, the next m2 rows belong to process 2 etc.. where 4179 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4180 values corresponding to [m x N] submatrix. 4181 4182 The columns are logically partitioned with the n0 columns belonging 4183 to 0th partition, the next n1 columns belonging to the next 4184 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4185 4186 The DIAGONAL portion of the local submatrix on any given processor 4187 is the submatrix corresponding to the rows and columns m,n 4188 corresponding to the given processor. i.e diagonal matrix on 4189 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4190 etc. The remaining portion of the local submatrix [m x (N-n)] 4191 constitute the OFF-DIAGONAL portion. The example below better 4192 illustrates this concept. 4193 4194 For a square global matrix we define each processor's diagonal portion 4195 to be its local rows and the corresponding columns (a square submatrix); 4196 each processor's off-diagonal portion encompasses the remainder of the 4197 local matrix (a rectangular submatrix). 4198 4199 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4200 4201 When calling this routine with a single process communicator, a matrix of 4202 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4203 type of communicator, use the construction mechanism 4204 .vb 4205 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4206 .ve 4207 4208 $ MatCreate(...,&A); 4209 $ MatSetType(A,MATMPIAIJ); 4210 $ MatSetSizes(A, m,n,M,N); 4211 $ MatMPIAIJSetPreallocation(A,...); 4212 4213 By default, this format uses inodes (identical nodes) when possible. 4214 We search for consecutive rows with the same nonzero structure, thereby 4215 reusing matrix information to achieve increased efficiency. 4216 4217 Options Database Keys: 4218 + -mat_no_inode - Do not use inodes 4219 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4220 4221 4222 4223 Example usage: 4224 4225 Consider the following 8x8 matrix with 34 non-zero values, that is 4226 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4227 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4228 as follows 4229 4230 .vb 4231 1 2 0 | 0 3 0 | 0 4 4232 Proc0 0 5 6 | 7 0 0 | 8 0 4233 9 0 10 | 11 0 0 | 12 0 4234 ------------------------------------- 4235 13 0 14 | 15 16 17 | 0 0 4236 Proc1 0 18 0 | 19 20 21 | 0 0 4237 0 0 0 | 22 23 0 | 24 0 4238 ------------------------------------- 4239 Proc2 25 26 27 | 0 0 28 | 29 0 4240 30 0 0 | 31 32 33 | 0 34 4241 .ve 4242 4243 This can be represented as a collection of submatrices as 4244 4245 .vb 4246 A B C 4247 D E F 4248 G H I 4249 .ve 4250 4251 Where the submatrices A,B,C are owned by proc0, D,E,F are 4252 owned by proc1, G,H,I are owned by proc2. 4253 4254 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4255 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4256 The 'M','N' parameters are 8,8, and have the same values on all procs. 4257 4258 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4259 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4260 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4261 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4262 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4263 matrix, ans [DF] as another SeqAIJ matrix. 4264 4265 When d_nz, o_nz parameters are specified, d_nz storage elements are 4266 allocated for every row of the local diagonal submatrix, and o_nz 4267 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4268 One way to choose d_nz and o_nz is to use the max nonzerors per local 4269 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4270 In this case, the values of d_nz,o_nz are 4271 .vb 4272 proc0 : dnz = 2, o_nz = 2 4273 proc1 : dnz = 3, o_nz = 2 4274 proc2 : dnz = 1, o_nz = 4 4275 .ve 4276 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4277 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4278 for proc3. i.e we are using 12+15+10=37 storage locations to store 4279 34 values. 4280 4281 When d_nnz, o_nnz parameters are specified, the storage is specified 4282 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4283 In the above case the values for d_nnz,o_nnz are 4284 .vb 4285 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4286 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4287 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4288 .ve 4289 Here the space allocated is sum of all the above values i.e 34, and 4290 hence pre-allocation is perfect. 4291 4292 Level: intermediate 4293 4294 .keywords: matrix, aij, compressed row, sparse, parallel 4295 4296 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4297 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4298 @*/ 4299 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4300 { 4301 PetscErrorCode ierr; 4302 PetscMPIInt size; 4303 4304 PetscFunctionBegin; 4305 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4306 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4307 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4308 if (size > 1) { 4309 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4310 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4311 } else { 4312 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4313 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4314 } 4315 PetscFunctionReturn(0); 4316 } 4317 4318 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4319 { 4320 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4321 PetscBool flg; 4322 PetscErrorCode ierr; 4323 4324 PetscFunctionBegin; 4325 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4326 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4327 if (Ad) *Ad = a->A; 4328 if (Ao) *Ao = a->B; 4329 if (colmap) *colmap = a->garray; 4330 PetscFunctionReturn(0); 4331 } 4332 4333 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4334 { 4335 PetscErrorCode ierr; 4336 PetscInt m,N,i,rstart,nnz,Ii; 4337 PetscInt *indx; 4338 PetscScalar *values; 4339 4340 PetscFunctionBegin; 4341 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4342 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4343 PetscInt *dnz,*onz,sum,bs,cbs; 4344 4345 if (n == PETSC_DECIDE) { 4346 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4347 } 4348 /* Check sum(n) = N */ 4349 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4350 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4351 4352 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4353 rstart -= m; 4354 4355 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4356 for (i=0; i<m; i++) { 4357 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4358 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4359 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4360 } 4361 4362 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4363 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4364 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4365 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4366 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4367 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4368 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4369 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4370 } 4371 4372 /* numeric phase */ 4373 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4374 for (i=0; i<m; i++) { 4375 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4376 Ii = i + rstart; 4377 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4378 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4379 } 4380 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4381 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4382 PetscFunctionReturn(0); 4383 } 4384 4385 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4386 { 4387 PetscErrorCode ierr; 4388 PetscMPIInt rank; 4389 PetscInt m,N,i,rstart,nnz; 4390 size_t len; 4391 const PetscInt *indx; 4392 PetscViewer out; 4393 char *name; 4394 Mat B; 4395 const PetscScalar *values; 4396 4397 PetscFunctionBegin; 4398 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4399 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4400 /* Should this be the type of the diagonal block of A? */ 4401 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4402 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4403 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4404 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4405 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4406 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4407 for (i=0; i<m; i++) { 4408 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4409 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4410 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4411 } 4412 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4413 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4414 4415 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4416 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4417 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4418 sprintf(name,"%s.%d",outfile,rank); 4419 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4420 ierr = PetscFree(name);CHKERRQ(ierr); 4421 ierr = MatView(B,out);CHKERRQ(ierr); 4422 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4423 ierr = MatDestroy(&B);CHKERRQ(ierr); 4424 PetscFunctionReturn(0); 4425 } 4426 4427 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4428 { 4429 PetscErrorCode ierr; 4430 Mat_Merge_SeqsToMPI *merge; 4431 PetscContainer container; 4432 4433 PetscFunctionBegin; 4434 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4435 if (container) { 4436 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4437 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4438 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4439 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4440 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4441 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4442 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4443 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4444 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4445 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4446 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4447 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4448 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4449 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4450 ierr = PetscFree(merge);CHKERRQ(ierr); 4451 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4452 } 4453 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4454 PetscFunctionReturn(0); 4455 } 4456 4457 #include <../src/mat/utils/freespace.h> 4458 #include <petscbt.h> 4459 4460 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4461 { 4462 PetscErrorCode ierr; 4463 MPI_Comm comm; 4464 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4465 PetscMPIInt size,rank,taga,*len_s; 4466 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4467 PetscInt proc,m; 4468 PetscInt **buf_ri,**buf_rj; 4469 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4470 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4471 MPI_Request *s_waits,*r_waits; 4472 MPI_Status *status; 4473 MatScalar *aa=a->a; 4474 MatScalar **abuf_r,*ba_i; 4475 Mat_Merge_SeqsToMPI *merge; 4476 PetscContainer container; 4477 4478 PetscFunctionBegin; 4479 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4480 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4481 4482 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4483 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4484 4485 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4486 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4487 4488 bi = merge->bi; 4489 bj = merge->bj; 4490 buf_ri = merge->buf_ri; 4491 buf_rj = merge->buf_rj; 4492 4493 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4494 owners = merge->rowmap->range; 4495 len_s = merge->len_s; 4496 4497 /* send and recv matrix values */ 4498 /*-----------------------------*/ 4499 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4500 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4501 4502 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4503 for (proc=0,k=0; proc<size; proc++) { 4504 if (!len_s[proc]) continue; 4505 i = owners[proc]; 4506 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4507 k++; 4508 } 4509 4510 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4511 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4512 ierr = PetscFree(status);CHKERRQ(ierr); 4513 4514 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4515 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4516 4517 /* insert mat values of mpimat */ 4518 /*----------------------------*/ 4519 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4520 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4521 4522 for (k=0; k<merge->nrecv; k++) { 4523 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4524 nrows = *(buf_ri_k[k]); 4525 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4526 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4527 } 4528 4529 /* set values of ba */ 4530 m = merge->rowmap->n; 4531 for (i=0; i<m; i++) { 4532 arow = owners[rank] + i; 4533 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4534 bnzi = bi[i+1] - bi[i]; 4535 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4536 4537 /* add local non-zero vals of this proc's seqmat into ba */ 4538 anzi = ai[arow+1] - ai[arow]; 4539 aj = a->j + ai[arow]; 4540 aa = a->a + ai[arow]; 4541 nextaj = 0; 4542 for (j=0; nextaj<anzi; j++) { 4543 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4544 ba_i[j] += aa[nextaj++]; 4545 } 4546 } 4547 4548 /* add received vals into ba */ 4549 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4550 /* i-th row */ 4551 if (i == *nextrow[k]) { 4552 anzi = *(nextai[k]+1) - *nextai[k]; 4553 aj = buf_rj[k] + *(nextai[k]); 4554 aa = abuf_r[k] + *(nextai[k]); 4555 nextaj = 0; 4556 for (j=0; nextaj<anzi; j++) { 4557 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4558 ba_i[j] += aa[nextaj++]; 4559 } 4560 } 4561 nextrow[k]++; nextai[k]++; 4562 } 4563 } 4564 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4565 } 4566 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4567 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4568 4569 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4570 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4571 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4572 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4573 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4574 PetscFunctionReturn(0); 4575 } 4576 4577 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4578 { 4579 PetscErrorCode ierr; 4580 Mat B_mpi; 4581 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4582 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4583 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4584 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4585 PetscInt len,proc,*dnz,*onz,bs,cbs; 4586 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4587 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4588 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4589 MPI_Status *status; 4590 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4591 PetscBT lnkbt; 4592 Mat_Merge_SeqsToMPI *merge; 4593 PetscContainer container; 4594 4595 PetscFunctionBegin; 4596 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4597 4598 /* make sure it is a PETSc comm */ 4599 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4600 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4601 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4602 4603 ierr = PetscNew(&merge);CHKERRQ(ierr); 4604 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4605 4606 /* determine row ownership */ 4607 /*---------------------------------------------------------*/ 4608 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4609 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4610 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4611 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4612 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4613 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4614 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4615 4616 m = merge->rowmap->n; 4617 owners = merge->rowmap->range; 4618 4619 /* determine the number of messages to send, their lengths */ 4620 /*---------------------------------------------------------*/ 4621 len_s = merge->len_s; 4622 4623 len = 0; /* length of buf_si[] */ 4624 merge->nsend = 0; 4625 for (proc=0; proc<size; proc++) { 4626 len_si[proc] = 0; 4627 if (proc == rank) { 4628 len_s[proc] = 0; 4629 } else { 4630 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4631 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4632 } 4633 if (len_s[proc]) { 4634 merge->nsend++; 4635 nrows = 0; 4636 for (i=owners[proc]; i<owners[proc+1]; i++) { 4637 if (ai[i+1] > ai[i]) nrows++; 4638 } 4639 len_si[proc] = 2*(nrows+1); 4640 len += len_si[proc]; 4641 } 4642 } 4643 4644 /* determine the number and length of messages to receive for ij-structure */ 4645 /*-------------------------------------------------------------------------*/ 4646 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4647 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4648 4649 /* post the Irecv of j-structure */ 4650 /*-------------------------------*/ 4651 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4652 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4653 4654 /* post the Isend of j-structure */ 4655 /*--------------------------------*/ 4656 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4657 4658 for (proc=0, k=0; proc<size; proc++) { 4659 if (!len_s[proc]) continue; 4660 i = owners[proc]; 4661 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4662 k++; 4663 } 4664 4665 /* receives and sends of j-structure are complete */ 4666 /*------------------------------------------------*/ 4667 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4668 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4669 4670 /* send and recv i-structure */ 4671 /*---------------------------*/ 4672 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4673 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4674 4675 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4676 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4677 for (proc=0,k=0; proc<size; proc++) { 4678 if (!len_s[proc]) continue; 4679 /* form outgoing message for i-structure: 4680 buf_si[0]: nrows to be sent 4681 [1:nrows]: row index (global) 4682 [nrows+1:2*nrows+1]: i-structure index 4683 */ 4684 /*-------------------------------------------*/ 4685 nrows = len_si[proc]/2 - 1; 4686 buf_si_i = buf_si + nrows+1; 4687 buf_si[0] = nrows; 4688 buf_si_i[0] = 0; 4689 nrows = 0; 4690 for (i=owners[proc]; i<owners[proc+1]; i++) { 4691 anzi = ai[i+1] - ai[i]; 4692 if (anzi) { 4693 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4694 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4695 nrows++; 4696 } 4697 } 4698 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4699 k++; 4700 buf_si += len_si[proc]; 4701 } 4702 4703 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4704 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4705 4706 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4707 for (i=0; i<merge->nrecv; i++) { 4708 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4709 } 4710 4711 ierr = PetscFree(len_si);CHKERRQ(ierr); 4712 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4713 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4714 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4715 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4716 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4717 ierr = PetscFree(status);CHKERRQ(ierr); 4718 4719 /* compute a local seq matrix in each processor */ 4720 /*----------------------------------------------*/ 4721 /* allocate bi array and free space for accumulating nonzero column info */ 4722 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4723 bi[0] = 0; 4724 4725 /* create and initialize a linked list */ 4726 nlnk = N+1; 4727 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4728 4729 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4730 len = ai[owners[rank+1]] - ai[owners[rank]]; 4731 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4732 4733 current_space = free_space; 4734 4735 /* determine symbolic info for each local row */ 4736 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4737 4738 for (k=0; k<merge->nrecv; k++) { 4739 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4740 nrows = *buf_ri_k[k]; 4741 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4742 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4743 } 4744 4745 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4746 len = 0; 4747 for (i=0; i<m; i++) { 4748 bnzi = 0; 4749 /* add local non-zero cols of this proc's seqmat into lnk */ 4750 arow = owners[rank] + i; 4751 anzi = ai[arow+1] - ai[arow]; 4752 aj = a->j + ai[arow]; 4753 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4754 bnzi += nlnk; 4755 /* add received col data into lnk */ 4756 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4757 if (i == *nextrow[k]) { /* i-th row */ 4758 anzi = *(nextai[k]+1) - *nextai[k]; 4759 aj = buf_rj[k] + *nextai[k]; 4760 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4761 bnzi += nlnk; 4762 nextrow[k]++; nextai[k]++; 4763 } 4764 } 4765 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4766 4767 /* if free space is not available, make more free space */ 4768 if (current_space->local_remaining<bnzi) { 4769 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4770 nspacedouble++; 4771 } 4772 /* copy data into free space, then initialize lnk */ 4773 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4774 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4775 4776 current_space->array += bnzi; 4777 current_space->local_used += bnzi; 4778 current_space->local_remaining -= bnzi; 4779 4780 bi[i+1] = bi[i] + bnzi; 4781 } 4782 4783 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4784 4785 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4786 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4787 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4788 4789 /* create symbolic parallel matrix B_mpi */ 4790 /*---------------------------------------*/ 4791 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4792 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4793 if (n==PETSC_DECIDE) { 4794 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4795 } else { 4796 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4797 } 4798 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4799 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4800 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4801 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4802 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4803 4804 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4805 B_mpi->assembled = PETSC_FALSE; 4806 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4807 merge->bi = bi; 4808 merge->bj = bj; 4809 merge->buf_ri = buf_ri; 4810 merge->buf_rj = buf_rj; 4811 merge->coi = NULL; 4812 merge->coj = NULL; 4813 merge->owners_co = NULL; 4814 4815 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4816 4817 /* attach the supporting struct to B_mpi for reuse */ 4818 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4819 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4820 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4821 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4822 *mpimat = B_mpi; 4823 4824 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4825 PetscFunctionReturn(0); 4826 } 4827 4828 /*@C 4829 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4830 matrices from each processor 4831 4832 Collective on MPI_Comm 4833 4834 Input Parameters: 4835 + comm - the communicators the parallel matrix will live on 4836 . seqmat - the input sequential matrices 4837 . m - number of local rows (or PETSC_DECIDE) 4838 . n - number of local columns (or PETSC_DECIDE) 4839 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4840 4841 Output Parameter: 4842 . mpimat - the parallel matrix generated 4843 4844 Level: advanced 4845 4846 Notes: 4847 The dimensions of the sequential matrix in each processor MUST be the same. 4848 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4849 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4850 @*/ 4851 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4852 { 4853 PetscErrorCode ierr; 4854 PetscMPIInt size; 4855 4856 PetscFunctionBegin; 4857 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4858 if (size == 1) { 4859 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4860 if (scall == MAT_INITIAL_MATRIX) { 4861 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4862 } else { 4863 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4864 } 4865 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4866 PetscFunctionReturn(0); 4867 } 4868 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4869 if (scall == MAT_INITIAL_MATRIX) { 4870 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4871 } 4872 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4873 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4874 PetscFunctionReturn(0); 4875 } 4876 4877 /*@ 4878 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4879 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4880 with MatGetSize() 4881 4882 Not Collective 4883 4884 Input Parameters: 4885 + A - the matrix 4886 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4887 4888 Output Parameter: 4889 . A_loc - the local sequential matrix generated 4890 4891 Level: developer 4892 4893 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4894 4895 @*/ 4896 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4897 { 4898 PetscErrorCode ierr; 4899 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4900 Mat_SeqAIJ *mat,*a,*b; 4901 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4902 MatScalar *aa,*ba,*cam; 4903 PetscScalar *ca; 4904 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4905 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4906 PetscBool match; 4907 MPI_Comm comm; 4908 PetscMPIInt size; 4909 4910 PetscFunctionBegin; 4911 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4912 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4913 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4914 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4915 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4916 4917 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4918 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4919 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4920 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4921 aa = a->a; ba = b->a; 4922 if (scall == MAT_INITIAL_MATRIX) { 4923 if (size == 1) { 4924 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4925 PetscFunctionReturn(0); 4926 } 4927 4928 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4929 ci[0] = 0; 4930 for (i=0; i<am; i++) { 4931 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4932 } 4933 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4934 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4935 k = 0; 4936 for (i=0; i<am; i++) { 4937 ncols_o = bi[i+1] - bi[i]; 4938 ncols_d = ai[i+1] - ai[i]; 4939 /* off-diagonal portion of A */ 4940 for (jo=0; jo<ncols_o; jo++) { 4941 col = cmap[*bj]; 4942 if (col >= cstart) break; 4943 cj[k] = col; bj++; 4944 ca[k++] = *ba++; 4945 } 4946 /* diagonal portion of A */ 4947 for (j=0; j<ncols_d; j++) { 4948 cj[k] = cstart + *aj++; 4949 ca[k++] = *aa++; 4950 } 4951 /* off-diagonal portion of A */ 4952 for (j=jo; j<ncols_o; j++) { 4953 cj[k] = cmap[*bj++]; 4954 ca[k++] = *ba++; 4955 } 4956 } 4957 /* put together the new matrix */ 4958 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4959 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4960 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4961 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4962 mat->free_a = PETSC_TRUE; 4963 mat->free_ij = PETSC_TRUE; 4964 mat->nonew = 0; 4965 } else if (scall == MAT_REUSE_MATRIX) { 4966 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4967 ci = mat->i; cj = mat->j; cam = mat->a; 4968 for (i=0; i<am; i++) { 4969 /* off-diagonal portion of A */ 4970 ncols_o = bi[i+1] - bi[i]; 4971 for (jo=0; jo<ncols_o; jo++) { 4972 col = cmap[*bj]; 4973 if (col >= cstart) break; 4974 *cam++ = *ba++; bj++; 4975 } 4976 /* diagonal portion of A */ 4977 ncols_d = ai[i+1] - ai[i]; 4978 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4979 /* off-diagonal portion of A */ 4980 for (j=jo; j<ncols_o; j++) { 4981 *cam++ = *ba++; bj++; 4982 } 4983 } 4984 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4985 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4986 PetscFunctionReturn(0); 4987 } 4988 4989 /*@C 4990 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4991 4992 Not Collective 4993 4994 Input Parameters: 4995 + A - the matrix 4996 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4997 - row, col - index sets of rows and columns to extract (or NULL) 4998 4999 Output Parameter: 5000 . A_loc - the local sequential matrix generated 5001 5002 Level: developer 5003 5004 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5005 5006 @*/ 5007 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5008 { 5009 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5010 PetscErrorCode ierr; 5011 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5012 IS isrowa,iscola; 5013 Mat *aloc; 5014 PetscBool match; 5015 5016 PetscFunctionBegin; 5017 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5018 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5019 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5020 if (!row) { 5021 start = A->rmap->rstart; end = A->rmap->rend; 5022 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5023 } else { 5024 isrowa = *row; 5025 } 5026 if (!col) { 5027 start = A->cmap->rstart; 5028 cmap = a->garray; 5029 nzA = a->A->cmap->n; 5030 nzB = a->B->cmap->n; 5031 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5032 ncols = 0; 5033 for (i=0; i<nzB; i++) { 5034 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5035 else break; 5036 } 5037 imark = i; 5038 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5039 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5040 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5041 } else { 5042 iscola = *col; 5043 } 5044 if (scall != MAT_INITIAL_MATRIX) { 5045 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5046 aloc[0] = *A_loc; 5047 } 5048 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5049 *A_loc = aloc[0]; 5050 ierr = PetscFree(aloc);CHKERRQ(ierr); 5051 if (!row) { 5052 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5053 } 5054 if (!col) { 5055 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5056 } 5057 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5058 PetscFunctionReturn(0); 5059 } 5060 5061 /*@C 5062 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5063 5064 Collective on Mat 5065 5066 Input Parameters: 5067 + A,B - the matrices in mpiaij format 5068 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5069 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5070 5071 Output Parameter: 5072 + rowb, colb - index sets of rows and columns of B to extract 5073 - B_seq - the sequential matrix generated 5074 5075 Level: developer 5076 5077 @*/ 5078 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5079 { 5080 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5081 PetscErrorCode ierr; 5082 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5083 IS isrowb,iscolb; 5084 Mat *bseq=NULL; 5085 5086 PetscFunctionBegin; 5087 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5088 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5089 } 5090 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5091 5092 if (scall == MAT_INITIAL_MATRIX) { 5093 start = A->cmap->rstart; 5094 cmap = a->garray; 5095 nzA = a->A->cmap->n; 5096 nzB = a->B->cmap->n; 5097 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5098 ncols = 0; 5099 for (i=0; i<nzB; i++) { /* row < local row index */ 5100 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5101 else break; 5102 } 5103 imark = i; 5104 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5105 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5106 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5107 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5108 } else { 5109 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5110 isrowb = *rowb; iscolb = *colb; 5111 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5112 bseq[0] = *B_seq; 5113 } 5114 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5115 *B_seq = bseq[0]; 5116 ierr = PetscFree(bseq);CHKERRQ(ierr); 5117 if (!rowb) { 5118 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5119 } else { 5120 *rowb = isrowb; 5121 } 5122 if (!colb) { 5123 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5124 } else { 5125 *colb = iscolb; 5126 } 5127 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5128 PetscFunctionReturn(0); 5129 } 5130 5131 /* 5132 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5133 of the OFF-DIAGONAL portion of local A 5134 5135 Collective on Mat 5136 5137 Input Parameters: 5138 + A,B - the matrices in mpiaij format 5139 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5140 5141 Output Parameter: 5142 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5143 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5144 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5145 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5146 5147 Level: developer 5148 5149 */ 5150 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5151 { 5152 VecScatter_MPI_General *gen_to,*gen_from; 5153 PetscErrorCode ierr; 5154 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5155 Mat_SeqAIJ *b_oth; 5156 VecScatter ctx; 5157 MPI_Comm comm; 5158 PetscMPIInt *rprocs,*sprocs,tag,rank; 5159 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5160 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5161 PetscScalar *b_otha,*bufa,*bufA,*vals; 5162 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5163 MPI_Request *rwaits = NULL,*swaits = NULL; 5164 MPI_Status *sstatus,rstatus; 5165 PetscMPIInt jj,size; 5166 VecScatterType type; 5167 PetscBool mpi1; 5168 5169 PetscFunctionBegin; 5170 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5171 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5172 5173 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5174 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5175 } 5176 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5178 5179 if (size == 1) { 5180 startsj_s = NULL; 5181 bufa_ptr = NULL; 5182 *B_oth = NULL; 5183 PetscFunctionReturn(0); 5184 } 5185 5186 ctx = a->Mvctx; 5187 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5188 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5189 if (!mpi1) { 5190 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5191 thus create a->Mvctx_mpi1 */ 5192 if (!a->Mvctx_mpi1) { 5193 a->Mvctx_mpi1_flg = PETSC_TRUE; 5194 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5195 } 5196 ctx = a->Mvctx_mpi1; 5197 } 5198 tag = ((PetscObject)ctx)->tag; 5199 5200 gen_to = (VecScatter_MPI_General*)ctx->todata; 5201 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5202 nrecvs = gen_from->n; 5203 nsends = gen_to->n; 5204 5205 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5206 srow = gen_to->indices; /* local row index to be sent */ 5207 sstarts = gen_to->starts; 5208 sprocs = gen_to->procs; 5209 sstatus = gen_to->sstatus; 5210 sbs = gen_to->bs; 5211 rstarts = gen_from->starts; 5212 rprocs = gen_from->procs; 5213 rbs = gen_from->bs; 5214 5215 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5216 if (scall == MAT_INITIAL_MATRIX) { 5217 /* i-array */ 5218 /*---------*/ 5219 /* post receives */ 5220 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5221 for (i=0; i<nrecvs; i++) { 5222 rowlen = rvalues + rstarts[i]*rbs; 5223 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5224 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5225 } 5226 5227 /* pack the outgoing message */ 5228 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5229 5230 sstartsj[0] = 0; 5231 rstartsj[0] = 0; 5232 len = 0; /* total length of j or a array to be sent */ 5233 k = 0; 5234 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5235 for (i=0; i<nsends; i++) { 5236 rowlen = svalues + sstarts[i]*sbs; 5237 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5238 for (j=0; j<nrows; j++) { 5239 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5240 for (l=0; l<sbs; l++) { 5241 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5242 5243 rowlen[j*sbs+l] = ncols; 5244 5245 len += ncols; 5246 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5247 } 5248 k++; 5249 } 5250 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5251 5252 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5253 } 5254 /* recvs and sends of i-array are completed */ 5255 i = nrecvs; 5256 while (i--) { 5257 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5258 } 5259 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5260 ierr = PetscFree(svalues);CHKERRQ(ierr); 5261 5262 /* allocate buffers for sending j and a arrays */ 5263 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5264 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5265 5266 /* create i-array of B_oth */ 5267 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5268 5269 b_othi[0] = 0; 5270 len = 0; /* total length of j or a array to be received */ 5271 k = 0; 5272 for (i=0; i<nrecvs; i++) { 5273 rowlen = rvalues + rstarts[i]*rbs; 5274 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5275 for (j=0; j<nrows; j++) { 5276 b_othi[k+1] = b_othi[k] + rowlen[j]; 5277 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5278 k++; 5279 } 5280 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5281 } 5282 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5283 5284 /* allocate space for j and a arrrays of B_oth */ 5285 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5286 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5287 5288 /* j-array */ 5289 /*---------*/ 5290 /* post receives of j-array */ 5291 for (i=0; i<nrecvs; i++) { 5292 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5293 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5294 } 5295 5296 /* pack the outgoing message j-array */ 5297 k = 0; 5298 for (i=0; i<nsends; i++) { 5299 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5300 bufJ = bufj+sstartsj[i]; 5301 for (j=0; j<nrows; j++) { 5302 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5303 for (ll=0; ll<sbs; ll++) { 5304 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5305 for (l=0; l<ncols; l++) { 5306 *bufJ++ = cols[l]; 5307 } 5308 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5309 } 5310 } 5311 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5312 } 5313 5314 /* recvs and sends of j-array are completed */ 5315 i = nrecvs; 5316 while (i--) { 5317 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5318 } 5319 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5320 } else if (scall == MAT_REUSE_MATRIX) { 5321 sstartsj = *startsj_s; 5322 rstartsj = *startsj_r; 5323 bufa = *bufa_ptr; 5324 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5325 b_otha = b_oth->a; 5326 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5327 5328 /* a-array */ 5329 /*---------*/ 5330 /* post receives of a-array */ 5331 for (i=0; i<nrecvs; i++) { 5332 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5333 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5334 } 5335 5336 /* pack the outgoing message a-array */ 5337 k = 0; 5338 for (i=0; i<nsends; i++) { 5339 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5340 bufA = bufa+sstartsj[i]; 5341 for (j=0; j<nrows; j++) { 5342 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5343 for (ll=0; ll<sbs; ll++) { 5344 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5345 for (l=0; l<ncols; l++) { 5346 *bufA++ = vals[l]; 5347 } 5348 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5349 } 5350 } 5351 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5352 } 5353 /* recvs and sends of a-array are completed */ 5354 i = nrecvs; 5355 while (i--) { 5356 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5357 } 5358 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5359 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5360 5361 if (scall == MAT_INITIAL_MATRIX) { 5362 /* put together the new matrix */ 5363 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5364 5365 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5366 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5367 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5368 b_oth->free_a = PETSC_TRUE; 5369 b_oth->free_ij = PETSC_TRUE; 5370 b_oth->nonew = 0; 5371 5372 ierr = PetscFree(bufj);CHKERRQ(ierr); 5373 if (!startsj_s || !bufa_ptr) { 5374 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5375 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5376 } else { 5377 *startsj_s = sstartsj; 5378 *startsj_r = rstartsj; 5379 *bufa_ptr = bufa; 5380 } 5381 } 5382 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5383 PetscFunctionReturn(0); 5384 } 5385 5386 /*@C 5387 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5388 5389 Not Collective 5390 5391 Input Parameters: 5392 . A - The matrix in mpiaij format 5393 5394 Output Parameter: 5395 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5396 . colmap - A map from global column index to local index into lvec 5397 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5398 5399 Level: developer 5400 5401 @*/ 5402 #if defined(PETSC_USE_CTABLE) 5403 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5404 #else 5405 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5406 #endif 5407 { 5408 Mat_MPIAIJ *a; 5409 5410 PetscFunctionBegin; 5411 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5412 PetscValidPointer(lvec, 2); 5413 PetscValidPointer(colmap, 3); 5414 PetscValidPointer(multScatter, 4); 5415 a = (Mat_MPIAIJ*) A->data; 5416 if (lvec) *lvec = a->lvec; 5417 if (colmap) *colmap = a->colmap; 5418 if (multScatter) *multScatter = a->Mvctx; 5419 PetscFunctionReturn(0); 5420 } 5421 5422 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5423 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5424 #if defined(PETSC_HAVE_MKL_SPARSE) 5425 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5426 #endif 5427 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5428 #if defined(PETSC_HAVE_ELEMENTAL) 5429 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5430 #endif 5431 #if defined(PETSC_HAVE_HYPRE) 5432 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5433 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5434 #endif 5435 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 5436 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5437 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5438 5439 /* 5440 Computes (B'*A')' since computing B*A directly is untenable 5441 5442 n p p 5443 ( ) ( ) ( ) 5444 m ( A ) * n ( B ) = m ( C ) 5445 ( ) ( ) ( ) 5446 5447 */ 5448 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5449 { 5450 PetscErrorCode ierr; 5451 Mat At,Bt,Ct; 5452 5453 PetscFunctionBegin; 5454 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5455 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5456 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5457 ierr = MatDestroy(&At);CHKERRQ(ierr); 5458 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5459 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5460 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5461 PetscFunctionReturn(0); 5462 } 5463 5464 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5465 { 5466 PetscErrorCode ierr; 5467 PetscInt m=A->rmap->n,n=B->cmap->n; 5468 Mat Cmat; 5469 5470 PetscFunctionBegin; 5471 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5472 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5473 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5474 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5475 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5476 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5477 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5478 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5479 5480 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5481 5482 *C = Cmat; 5483 PetscFunctionReturn(0); 5484 } 5485 5486 /* ----------------------------------------------------------------*/ 5487 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5488 { 5489 PetscErrorCode ierr; 5490 5491 PetscFunctionBegin; 5492 if (scall == MAT_INITIAL_MATRIX) { 5493 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5494 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5495 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5496 } 5497 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5498 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5499 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5500 PetscFunctionReturn(0); 5501 } 5502 5503 /*MC 5504 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5505 5506 Options Database Keys: 5507 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5508 5509 Level: beginner 5510 5511 .seealso: MatCreateAIJ() 5512 M*/ 5513 5514 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5515 { 5516 Mat_MPIAIJ *b; 5517 PetscErrorCode ierr; 5518 PetscMPIInt size; 5519 5520 PetscFunctionBegin; 5521 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5522 5523 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5524 B->data = (void*)b; 5525 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5526 B->assembled = PETSC_FALSE; 5527 B->insertmode = NOT_SET_VALUES; 5528 b->size = size; 5529 5530 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5531 5532 /* build cache for off array entries formed */ 5533 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5534 5535 b->donotstash = PETSC_FALSE; 5536 b->colmap = 0; 5537 b->garray = 0; 5538 b->roworiented = PETSC_TRUE; 5539 5540 /* stuff used for matrix vector multiply */ 5541 b->lvec = NULL; 5542 b->Mvctx = NULL; 5543 5544 /* stuff for MatGetRow() */ 5545 b->rowindices = 0; 5546 b->rowvalues = 0; 5547 b->getrowactive = PETSC_FALSE; 5548 5549 /* flexible pointer used in CUSP/CUSPARSE classes */ 5550 b->spptr = NULL; 5551 5552 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5553 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5554 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5555 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5556 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5557 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5558 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5559 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5560 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5561 #if defined(PETSC_HAVE_MKL_SPARSE) 5562 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5563 #endif 5564 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5565 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5566 #if defined(PETSC_HAVE_ELEMENTAL) 5567 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5568 #endif 5569 #if defined(PETSC_HAVE_HYPRE) 5570 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5571 #endif 5572 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5573 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5574 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5575 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5576 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5577 #if defined(PETSC_HAVE_HYPRE) 5578 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5579 #endif 5580 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5581 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5582 PetscFunctionReturn(0); 5583 } 5584 5585 /*@C 5586 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5587 and "off-diagonal" part of the matrix in CSR format. 5588 5589 Collective on MPI_Comm 5590 5591 Input Parameters: 5592 + comm - MPI communicator 5593 . m - number of local rows (Cannot be PETSC_DECIDE) 5594 . n - This value should be the same as the local size used in creating the 5595 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5596 calculated if N is given) For square matrices n is almost always m. 5597 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5598 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5599 . i - row indices for "diagonal" portion of matrix 5600 . j - column indices 5601 . a - matrix values 5602 . oi - row indices for "off-diagonal" portion of matrix 5603 . oj - column indices 5604 - oa - matrix values 5605 5606 Output Parameter: 5607 . mat - the matrix 5608 5609 Level: advanced 5610 5611 Notes: 5612 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5613 must free the arrays once the matrix has been destroyed and not before. 5614 5615 The i and j indices are 0 based 5616 5617 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5618 5619 This sets local rows and cannot be used to set off-processor values. 5620 5621 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5622 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5623 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5624 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5625 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5626 communication if it is known that only local entries will be set. 5627 5628 .keywords: matrix, aij, compressed row, sparse, parallel 5629 5630 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5631 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5632 @*/ 5633 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5634 { 5635 PetscErrorCode ierr; 5636 Mat_MPIAIJ *maij; 5637 5638 PetscFunctionBegin; 5639 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5640 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5641 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5642 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5643 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5644 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5645 maij = (Mat_MPIAIJ*) (*mat)->data; 5646 5647 (*mat)->preallocated = PETSC_TRUE; 5648 5649 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5650 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5651 5652 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5653 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5654 5655 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5656 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5657 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5658 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5659 5660 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5661 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5662 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5663 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5664 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5665 PetscFunctionReturn(0); 5666 } 5667 5668 /* 5669 Special version for direct calls from Fortran 5670 */ 5671 #include <petsc/private/fortranimpl.h> 5672 5673 /* Change these macros so can be used in void function */ 5674 #undef CHKERRQ 5675 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5676 #undef SETERRQ2 5677 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5678 #undef SETERRQ3 5679 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5680 #undef SETERRQ 5681 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5682 5683 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5684 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5685 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5686 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5687 #else 5688 #endif 5689 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5690 { 5691 Mat mat = *mmat; 5692 PetscInt m = *mm, n = *mn; 5693 InsertMode addv = *maddv; 5694 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5695 PetscScalar value; 5696 PetscErrorCode ierr; 5697 5698 MatCheckPreallocated(mat,1); 5699 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5700 5701 #if defined(PETSC_USE_DEBUG) 5702 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5703 #endif 5704 { 5705 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5706 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5707 PetscBool roworiented = aij->roworiented; 5708 5709 /* Some Variables required in the macro */ 5710 Mat A = aij->A; 5711 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5712 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5713 MatScalar *aa = a->a; 5714 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5715 Mat B = aij->B; 5716 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5717 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5718 MatScalar *ba = b->a; 5719 5720 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5721 PetscInt nonew = a->nonew; 5722 MatScalar *ap1,*ap2; 5723 5724 PetscFunctionBegin; 5725 for (i=0; i<m; i++) { 5726 if (im[i] < 0) continue; 5727 #if defined(PETSC_USE_DEBUG) 5728 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5729 #endif 5730 if (im[i] >= rstart && im[i] < rend) { 5731 row = im[i] - rstart; 5732 lastcol1 = -1; 5733 rp1 = aj + ai[row]; 5734 ap1 = aa + ai[row]; 5735 rmax1 = aimax[row]; 5736 nrow1 = ailen[row]; 5737 low1 = 0; 5738 high1 = nrow1; 5739 lastcol2 = -1; 5740 rp2 = bj + bi[row]; 5741 ap2 = ba + bi[row]; 5742 rmax2 = bimax[row]; 5743 nrow2 = bilen[row]; 5744 low2 = 0; 5745 high2 = nrow2; 5746 5747 for (j=0; j<n; j++) { 5748 if (roworiented) value = v[i*n+j]; 5749 else value = v[i+j*m]; 5750 if (in[j] >= cstart && in[j] < cend) { 5751 col = in[j] - cstart; 5752 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5753 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5754 } else if (in[j] < 0) continue; 5755 #if defined(PETSC_USE_DEBUG) 5756 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5757 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5758 #endif 5759 else { 5760 if (mat->was_assembled) { 5761 if (!aij->colmap) { 5762 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5763 } 5764 #if defined(PETSC_USE_CTABLE) 5765 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5766 col--; 5767 #else 5768 col = aij->colmap[in[j]] - 1; 5769 #endif 5770 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5771 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5772 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5773 col = in[j]; 5774 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5775 B = aij->B; 5776 b = (Mat_SeqAIJ*)B->data; 5777 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5778 rp2 = bj + bi[row]; 5779 ap2 = ba + bi[row]; 5780 rmax2 = bimax[row]; 5781 nrow2 = bilen[row]; 5782 low2 = 0; 5783 high2 = nrow2; 5784 bm = aij->B->rmap->n; 5785 ba = b->a; 5786 } 5787 } else col = in[j]; 5788 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5789 } 5790 } 5791 } else if (!aij->donotstash) { 5792 if (roworiented) { 5793 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5794 } else { 5795 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5796 } 5797 } 5798 } 5799 } 5800 PetscFunctionReturnVoid(); 5801 } 5802 5803