1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 This function sets the j-arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 637 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 638 */ 639 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscInt *dnz, const PetscInt *onz) 640 { 641 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 642 Mat A = aij->A; /* diagonal part of the matrix */ 643 Mat B = aij->B; /* offdiagonal part of the matrix */ 644 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 645 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 646 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 647 PetscInt *ailen = a->ilen,*aj = a->j; 648 PetscInt *bilen = b->ilen,*bj = b->j; 649 PetscInt am = aij->A->rmap->n,i,j; 650 PetscInt col, diag_so_far=0, offd_so_far=0; 651 652 PetscFunctionBegin; 653 /* Iterate over all rows of the matrix */ 654 for (j=0; j<am; j++) { 655 for (i=0; i<dnz[j]+onz[j]; i++) { 656 col = i + mat_i[j]; 657 /* If column is in the diagonal */ 658 if (mat_j[col] >= cstart && mat_j[col] < cend) { 659 aj[diag_so_far++] = mat_j[col] - cstart; 660 } else { /* off-diagonal entries */ 661 bj[offd_so_far++] = mat_j[col]; 662 } 663 } 664 ailen[j] = dnz[j]; 665 bilen[j] = onz[j]; 666 } 667 PetscFunctionReturn(0); 668 } 669 670 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 671 { 672 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 673 PetscErrorCode ierr; 674 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 675 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 676 677 PetscFunctionBegin; 678 for (i=0; i<m; i++) { 679 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 680 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 681 if (idxm[i] >= rstart && idxm[i] < rend) { 682 row = idxm[i] - rstart; 683 for (j=0; j<n; j++) { 684 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 685 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 686 if (idxn[j] >= cstart && idxn[j] < cend) { 687 col = idxn[j] - cstart; 688 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 689 } else { 690 if (!aij->colmap) { 691 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 692 } 693 #if defined(PETSC_USE_CTABLE) 694 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 695 col--; 696 #else 697 col = aij->colmap[idxn[j]] - 1; 698 #endif 699 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 700 else { 701 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 702 } 703 } 704 } 705 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 706 } 707 PetscFunctionReturn(0); 708 } 709 710 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 711 712 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 713 { 714 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 715 PetscErrorCode ierr; 716 PetscInt nstash,reallocs; 717 718 PetscFunctionBegin; 719 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 720 721 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 722 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 723 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 724 PetscFunctionReturn(0); 725 } 726 727 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 730 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 731 PetscErrorCode ierr; 732 PetscMPIInt n; 733 PetscInt i,j,rstart,ncols,flg; 734 PetscInt *row,*col; 735 PetscBool other_disassembled; 736 PetscScalar *val; 737 738 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 739 740 PetscFunctionBegin; 741 if (!aij->donotstash && !mat->nooffprocentries) { 742 while (1) { 743 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 744 if (!flg) break; 745 746 for (i=0; i<n; ) { 747 /* Now identify the consecutive vals belonging to the same row */ 748 for (j=i,rstart=row[j]; j<n; j++) { 749 if (row[j] != rstart) break; 750 } 751 if (j < n) ncols = j-i; 752 else ncols = n-i; 753 /* Now assemble all these values with a single function call */ 754 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 755 756 i = j; 757 } 758 } 759 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 760 } 761 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 762 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 763 764 /* determine if any processor has disassembled, if so we must 765 also disassemble ourselfs, in order that we may reassemble. */ 766 /* 767 if nonzero structure of submatrix B cannot change then we know that 768 no processor disassembled thus we can skip this stuff 769 */ 770 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 771 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 772 if (mat->was_assembled && !other_disassembled) { 773 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 774 } 775 } 776 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 777 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 778 } 779 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 780 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 781 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 782 783 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 784 785 aij->rowvalues = 0; 786 787 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 788 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 789 790 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 791 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 792 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 793 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 794 } 795 PetscFunctionReturn(0); 796 } 797 798 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 799 { 800 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 801 PetscErrorCode ierr; 802 803 PetscFunctionBegin; 804 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 805 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 806 PetscFunctionReturn(0); 807 } 808 809 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 810 { 811 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 812 PetscInt *lrows; 813 PetscInt r, len; 814 PetscBool cong; 815 PetscErrorCode ierr; 816 817 PetscFunctionBegin; 818 /* get locally owned rows */ 819 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 820 /* fix right hand side if needed */ 821 if (x && b) { 822 const PetscScalar *xx; 823 PetscScalar *bb; 824 825 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 826 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 827 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 828 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 829 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 830 } 831 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 832 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 833 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 834 if ((diag != 0.0) && cong) { 835 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 836 } else if (diag != 0.0) { 837 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 838 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 839 for (r = 0; r < len; ++r) { 840 const PetscInt row = lrows[r] + A->rmap->rstart; 841 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 842 } 843 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 844 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 845 } else { 846 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 847 } 848 ierr = PetscFree(lrows);CHKERRQ(ierr); 849 850 /* only change matrix nonzero state if pattern was allowed to be changed */ 851 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 852 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 853 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 854 } 855 PetscFunctionReturn(0); 856 } 857 858 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 859 { 860 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 861 PetscErrorCode ierr; 862 PetscMPIInt n = A->rmap->n; 863 PetscInt i,j,r,m,p = 0,len = 0; 864 PetscInt *lrows,*owners = A->rmap->range; 865 PetscSFNode *rrows; 866 PetscSF sf; 867 const PetscScalar *xx; 868 PetscScalar *bb,*mask; 869 Vec xmask,lmask; 870 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 871 const PetscInt *aj, *ii,*ridx; 872 PetscScalar *aa; 873 874 PetscFunctionBegin; 875 /* Create SF where leaves are input rows and roots are owned rows */ 876 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 877 for (r = 0; r < n; ++r) lrows[r] = -1; 878 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 879 for (r = 0; r < N; ++r) { 880 const PetscInt idx = rows[r]; 881 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 882 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 883 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 884 } 885 rrows[r].rank = p; 886 rrows[r].index = rows[r] - owners[p]; 887 } 888 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 889 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 890 /* Collect flags for rows to be zeroed */ 891 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 892 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 893 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 894 /* Compress and put in row numbers */ 895 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 896 /* zero diagonal part of matrix */ 897 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 898 /* handle off diagonal part of matrix */ 899 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 900 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 901 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 902 for (i=0; i<len; i++) bb[lrows[i]] = 1; 903 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 904 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 907 if (x) { 908 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 910 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 911 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 912 } 913 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 914 /* remove zeroed rows of off diagonal matrix */ 915 ii = aij->i; 916 for (i=0; i<len; i++) { 917 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 918 } 919 /* loop over all elements of off process part of matrix zeroing removed columns*/ 920 if (aij->compressedrow.use) { 921 m = aij->compressedrow.nrows; 922 ii = aij->compressedrow.i; 923 ridx = aij->compressedrow.rindex; 924 for (i=0; i<m; i++) { 925 n = ii[i+1] - ii[i]; 926 aj = aij->j + ii[i]; 927 aa = aij->a + ii[i]; 928 929 for (j=0; j<n; j++) { 930 if (PetscAbsScalar(mask[*aj])) { 931 if (b) bb[*ridx] -= *aa*xx[*aj]; 932 *aa = 0.0; 933 } 934 aa++; 935 aj++; 936 } 937 ridx++; 938 } 939 } else { /* do not use compressed row format */ 940 m = l->B->rmap->n; 941 for (i=0; i<m; i++) { 942 n = ii[i+1] - ii[i]; 943 aj = aij->j + ii[i]; 944 aa = aij->a + ii[i]; 945 for (j=0; j<n; j++) { 946 if (PetscAbsScalar(mask[*aj])) { 947 if (b) bb[i] -= *aa*xx[*aj]; 948 *aa = 0.0; 949 } 950 aa++; 951 aj++; 952 } 953 } 954 } 955 if (x) { 956 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 957 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 958 } 959 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 960 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 961 ierr = PetscFree(lrows);CHKERRQ(ierr); 962 963 /* only change matrix nonzero state if pattern was allowed to be changed */ 964 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 965 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 966 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 967 } 968 PetscFunctionReturn(0); 969 } 970 971 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 PetscInt nt; 976 VecScatter Mvctx = a->Mvctx; 977 978 PetscFunctionBegin; 979 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 980 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 981 982 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 984 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 985 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } 988 989 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 990 { 991 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 992 PetscErrorCode ierr; 993 994 PetscFunctionBegin; 995 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 996 PetscFunctionReturn(0); 997 } 998 999 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1000 { 1001 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1002 PetscErrorCode ierr; 1003 VecScatter Mvctx = a->Mvctx; 1004 1005 PetscFunctionBegin; 1006 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1007 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1008 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1009 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 1014 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1015 { 1016 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1017 PetscErrorCode ierr; 1018 PetscBool merged; 1019 1020 PetscFunctionBegin; 1021 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1022 /* do nondiagonal part */ 1023 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1024 if (!merged) { 1025 /* send it on its way */ 1026 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1027 /* do local part */ 1028 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1029 /* receive remote parts: note this assumes the values are not actually */ 1030 /* added in yy until the next line, */ 1031 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1032 } else { 1033 /* do local part */ 1034 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1035 /* send it on its way */ 1036 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1037 /* values actually were received in the Begin() but we need to call this nop */ 1038 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1039 } 1040 PetscFunctionReturn(0); 1041 } 1042 1043 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1044 { 1045 MPI_Comm comm; 1046 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1047 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1048 IS Me,Notme; 1049 PetscErrorCode ierr; 1050 PetscInt M,N,first,last,*notme,i; 1051 PetscMPIInt size; 1052 1053 PetscFunctionBegin; 1054 /* Easy test: symmetric diagonal block */ 1055 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1056 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1057 if (!*f) PetscFunctionReturn(0); 1058 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1059 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1060 if (size == 1) PetscFunctionReturn(0); 1061 1062 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1063 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1064 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1065 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1066 for (i=0; i<first; i++) notme[i] = i; 1067 for (i=last; i<M; i++) notme[i-last+first] = i; 1068 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1069 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1070 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1071 Aoff = Aoffs[0]; 1072 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1073 Boff = Boffs[0]; 1074 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1075 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1076 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1077 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1078 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1079 ierr = PetscFree(notme);CHKERRQ(ierr); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1084 { 1085 PetscErrorCode ierr; 1086 1087 PetscFunctionBegin; 1088 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1093 { 1094 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 /* do nondiagonal part */ 1099 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1100 /* send it on its way */ 1101 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1102 /* do local part */ 1103 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1104 /* receive remote parts */ 1105 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 /* 1110 This only works correctly for square matrices where the subblock A->A is the 1111 diagonal block 1112 */ 1113 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1114 { 1115 PetscErrorCode ierr; 1116 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1117 1118 PetscFunctionBegin; 1119 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1120 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1121 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1132 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1137 { 1138 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1139 PetscErrorCode ierr; 1140 1141 PetscFunctionBegin; 1142 #if defined(PETSC_USE_LOG) 1143 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1144 #endif 1145 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1146 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1147 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1148 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1149 #if defined(PETSC_USE_CTABLE) 1150 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1151 #else 1152 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1153 #endif 1154 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1155 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1156 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1157 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1158 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1159 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1160 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1161 1162 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1171 #if defined(PETSC_HAVE_ELEMENTAL) 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1173 #endif 1174 #if defined(PETSC_HAVE_HYPRE) 1175 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1177 #endif 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1179 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1184 { 1185 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1186 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1187 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1188 PetscErrorCode ierr; 1189 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1190 int fd; 1191 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1192 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1193 PetscScalar *column_values; 1194 PetscInt message_count,flowcontrolcount; 1195 FILE *file; 1196 1197 PetscFunctionBegin; 1198 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1199 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1200 nz = A->nz + B->nz; 1201 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1202 if (!rank) { 1203 header[0] = MAT_FILE_CLASSID; 1204 header[1] = mat->rmap->N; 1205 header[2] = mat->cmap->N; 1206 1207 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1208 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1209 /* get largest number of rows any processor has */ 1210 rlen = mat->rmap->n; 1211 range = mat->rmap->range; 1212 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1213 } else { 1214 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1215 rlen = mat->rmap->n; 1216 } 1217 1218 /* load up the local row counts */ 1219 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1220 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1221 1222 /* store the row lengths to the file */ 1223 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1224 if (!rank) { 1225 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1226 for (i=1; i<size; i++) { 1227 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1228 rlen = range[i+1] - range[i]; 1229 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1230 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1231 } 1232 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1233 } else { 1234 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1235 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1236 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1237 } 1238 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1239 1240 /* load up the local column indices */ 1241 nzmax = nz; /* th processor needs space a largest processor needs */ 1242 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1243 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1244 cnt = 0; 1245 for (i=0; i<mat->rmap->n; i++) { 1246 for (j=B->i[i]; j<B->i[i+1]; j++) { 1247 if ((col = garray[B->j[j]]) > cstart) break; 1248 column_indices[cnt++] = col; 1249 } 1250 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1251 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1252 } 1253 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1254 1255 /* store the column indices to the file */ 1256 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1257 if (!rank) { 1258 MPI_Status status; 1259 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1260 for (i=1; i<size; i++) { 1261 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1262 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1263 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1264 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1265 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1266 } 1267 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1268 } else { 1269 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1270 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1271 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1272 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1273 } 1274 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1275 1276 /* load up the local column values */ 1277 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1278 cnt = 0; 1279 for (i=0; i<mat->rmap->n; i++) { 1280 for (j=B->i[i]; j<B->i[i+1]; j++) { 1281 if (garray[B->j[j]] > cstart) break; 1282 column_values[cnt++] = B->a[j]; 1283 } 1284 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1285 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1286 } 1287 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1288 1289 /* store the column values to the file */ 1290 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1291 if (!rank) { 1292 MPI_Status status; 1293 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1294 for (i=1; i<size; i++) { 1295 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1296 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1297 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1298 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1299 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1300 } 1301 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1302 } else { 1303 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1304 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1305 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1306 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1307 } 1308 ierr = PetscFree(column_values);CHKERRQ(ierr); 1309 1310 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1311 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #include <petscdraw.h> 1316 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1317 { 1318 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1319 PetscErrorCode ierr; 1320 PetscMPIInt rank = aij->rank,size = aij->size; 1321 PetscBool isdraw,iascii,isbinary; 1322 PetscViewer sviewer; 1323 PetscViewerFormat format; 1324 1325 PetscFunctionBegin; 1326 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1327 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1328 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1329 if (iascii) { 1330 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1331 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1332 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1333 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1334 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1335 for (i=0; i<(PetscInt)size; i++) { 1336 nmax = PetscMax(nmax,nz[i]); 1337 nmin = PetscMin(nmin,nz[i]); 1338 navg += nz[i]; 1339 } 1340 ierr = PetscFree(nz);CHKERRQ(ierr); 1341 navg = navg/size; 1342 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1346 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1347 MatInfo info; 1348 PetscBool inodes; 1349 1350 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1351 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1352 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1353 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1354 if (!inodes) { 1355 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1356 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1357 } else { 1358 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1359 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1360 } 1361 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1362 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1363 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1364 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1365 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1366 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1367 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1368 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1371 PetscInt inodecount,inodelimit,*inodes; 1372 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1373 if (inodes) { 1374 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1375 } else { 1376 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1377 } 1378 PetscFunctionReturn(0); 1379 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1380 PetscFunctionReturn(0); 1381 } 1382 } else if (isbinary) { 1383 if (size == 1) { 1384 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1385 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1386 } else { 1387 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1388 } 1389 PetscFunctionReturn(0); 1390 } else if (isdraw) { 1391 PetscDraw draw; 1392 PetscBool isnull; 1393 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1394 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1395 if (isnull) PetscFunctionReturn(0); 1396 } 1397 1398 { 1399 /* assemble the entire matrix onto first processor. */ 1400 Mat A; 1401 Mat_SeqAIJ *Aloc; 1402 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1403 MatScalar *a; 1404 1405 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1406 if (!rank) { 1407 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1408 } else { 1409 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1410 } 1411 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1412 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1413 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1414 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1415 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1416 1417 /* copy over the A part */ 1418 Aloc = (Mat_SeqAIJ*)aij->A->data; 1419 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1420 row = mat->rmap->rstart; 1421 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1422 for (i=0; i<m; i++) { 1423 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1424 row++; 1425 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1426 } 1427 aj = Aloc->j; 1428 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1429 1430 /* copy over the B part */ 1431 Aloc = (Mat_SeqAIJ*)aij->B->data; 1432 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1433 row = mat->rmap->rstart; 1434 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1435 ct = cols; 1436 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1437 for (i=0; i<m; i++) { 1438 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1439 row++; 1440 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1441 } 1442 ierr = PetscFree(ct);CHKERRQ(ierr); 1443 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1444 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1445 /* 1446 Everyone has to call to draw the matrix since the graphics waits are 1447 synchronized across all processors that share the PetscDraw object 1448 */ 1449 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1450 if (!rank) { 1451 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1452 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1453 } 1454 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1455 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1456 ierr = MatDestroy(&A);CHKERRQ(ierr); 1457 } 1458 PetscFunctionReturn(0); 1459 } 1460 1461 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1462 { 1463 PetscErrorCode ierr; 1464 PetscBool iascii,isdraw,issocket,isbinary; 1465 1466 PetscFunctionBegin; 1467 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1468 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1469 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1470 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1471 if (iascii || isdraw || isbinary || issocket) { 1472 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1473 } 1474 PetscFunctionReturn(0); 1475 } 1476 1477 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1478 { 1479 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1480 PetscErrorCode ierr; 1481 Vec bb1 = 0; 1482 PetscBool hasop; 1483 1484 PetscFunctionBegin; 1485 if (flag == SOR_APPLY_UPPER) { 1486 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1487 PetscFunctionReturn(0); 1488 } 1489 1490 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1491 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1492 } 1493 1494 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1495 if (flag & SOR_ZERO_INITIAL_GUESS) { 1496 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1497 its--; 1498 } 1499 1500 while (its--) { 1501 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1502 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 1504 /* update rhs: bb1 = bb - B*x */ 1505 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1506 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1507 1508 /* local sweep */ 1509 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1510 } 1511 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1512 if (flag & SOR_ZERO_INITIAL_GUESS) { 1513 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1514 its--; 1515 } 1516 while (its--) { 1517 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1518 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1519 1520 /* update rhs: bb1 = bb - B*x */ 1521 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1522 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1523 1524 /* local sweep */ 1525 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1526 } 1527 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1528 if (flag & SOR_ZERO_INITIAL_GUESS) { 1529 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1530 its--; 1531 } 1532 while (its--) { 1533 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1534 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1535 1536 /* update rhs: bb1 = bb - B*x */ 1537 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1538 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1539 1540 /* local sweep */ 1541 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1542 } 1543 } else if (flag & SOR_EISENSTAT) { 1544 Vec xx1; 1545 1546 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1547 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1548 1549 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1550 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1551 if (!mat->diag) { 1552 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1553 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1554 } 1555 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1556 if (hasop) { 1557 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1558 } else { 1559 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1560 } 1561 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1562 1563 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1564 1565 /* local sweep */ 1566 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1567 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1568 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1569 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1570 1571 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1572 1573 matin->factorerrortype = mat->A->factorerrortype; 1574 PetscFunctionReturn(0); 1575 } 1576 1577 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1578 { 1579 Mat aA,aB,Aperm; 1580 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1581 PetscScalar *aa,*ba; 1582 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1583 PetscSF rowsf,sf; 1584 IS parcolp = NULL; 1585 PetscBool done; 1586 PetscErrorCode ierr; 1587 1588 PetscFunctionBegin; 1589 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1590 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1591 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1592 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1593 1594 /* Invert row permutation to find out where my rows should go */ 1595 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1596 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1597 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1598 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1599 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1600 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1601 1602 /* Invert column permutation to find out where my columns should go */ 1603 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1604 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1605 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1606 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1607 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1608 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1609 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1610 1611 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1612 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1613 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1614 1615 /* Find out where my gcols should go */ 1616 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1617 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1618 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1619 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1620 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1621 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1622 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1623 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1624 1625 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1626 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1627 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1628 for (i=0; i<m; i++) { 1629 PetscInt row = rdest[i],rowner; 1630 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1631 for (j=ai[i]; j<ai[i+1]; j++) { 1632 PetscInt cowner,col = cdest[aj[j]]; 1633 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1634 if (rowner == cowner) dnnz[i]++; 1635 else onnz[i]++; 1636 } 1637 for (j=bi[i]; j<bi[i+1]; j++) { 1638 PetscInt cowner,col = gcdest[bj[j]]; 1639 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1640 if (rowner == cowner) dnnz[i]++; 1641 else onnz[i]++; 1642 } 1643 } 1644 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1645 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1646 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1647 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1648 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1649 1650 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1651 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1652 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1653 for (i=0; i<m; i++) { 1654 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1655 PetscInt j0,rowlen; 1656 rowlen = ai[i+1] - ai[i]; 1657 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1658 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1659 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1660 } 1661 rowlen = bi[i+1] - bi[i]; 1662 for (j0=j=0; j<rowlen; j0=j) { 1663 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1664 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1665 } 1666 } 1667 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1668 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1669 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1670 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1671 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1672 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1673 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1674 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1675 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1676 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1677 *B = Aperm; 1678 PetscFunctionReturn(0); 1679 } 1680 1681 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1682 { 1683 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1684 PetscErrorCode ierr; 1685 1686 PetscFunctionBegin; 1687 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1688 if (ghosts) *ghosts = aij->garray; 1689 PetscFunctionReturn(0); 1690 } 1691 1692 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1693 { 1694 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1695 Mat A = mat->A,B = mat->B; 1696 PetscErrorCode ierr; 1697 PetscReal isend[5],irecv[5]; 1698 1699 PetscFunctionBegin; 1700 info->block_size = 1.0; 1701 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1702 1703 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1704 isend[3] = info->memory; isend[4] = info->mallocs; 1705 1706 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1707 1708 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1709 isend[3] += info->memory; isend[4] += info->mallocs; 1710 if (flag == MAT_LOCAL) { 1711 info->nz_used = isend[0]; 1712 info->nz_allocated = isend[1]; 1713 info->nz_unneeded = isend[2]; 1714 info->memory = isend[3]; 1715 info->mallocs = isend[4]; 1716 } else if (flag == MAT_GLOBAL_MAX) { 1717 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1718 1719 info->nz_used = irecv[0]; 1720 info->nz_allocated = irecv[1]; 1721 info->nz_unneeded = irecv[2]; 1722 info->memory = irecv[3]; 1723 info->mallocs = irecv[4]; 1724 } else if (flag == MAT_GLOBAL_SUM) { 1725 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1726 1727 info->nz_used = irecv[0]; 1728 info->nz_allocated = irecv[1]; 1729 info->nz_unneeded = irecv[2]; 1730 info->memory = irecv[3]; 1731 info->mallocs = irecv[4]; 1732 } 1733 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1734 info->fill_ratio_needed = 0; 1735 info->factor_mallocs = 0; 1736 PetscFunctionReturn(0); 1737 } 1738 1739 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1740 { 1741 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1742 PetscErrorCode ierr; 1743 1744 PetscFunctionBegin; 1745 switch (op) { 1746 case MAT_NEW_NONZERO_LOCATIONS: 1747 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1748 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1749 case MAT_KEEP_NONZERO_PATTERN: 1750 case MAT_NEW_NONZERO_LOCATION_ERR: 1751 case MAT_USE_INODES: 1752 case MAT_IGNORE_ZERO_ENTRIES: 1753 MatCheckPreallocated(A,1); 1754 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1755 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1756 break; 1757 case MAT_ROW_ORIENTED: 1758 MatCheckPreallocated(A,1); 1759 a->roworiented = flg; 1760 1761 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1762 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1763 break; 1764 case MAT_NEW_DIAGONALS: 1765 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1766 break; 1767 case MAT_IGNORE_OFF_PROC_ENTRIES: 1768 a->donotstash = flg; 1769 break; 1770 case MAT_SPD: 1771 A->spd_set = PETSC_TRUE; 1772 A->spd = flg; 1773 if (flg) { 1774 A->symmetric = PETSC_TRUE; 1775 A->structurally_symmetric = PETSC_TRUE; 1776 A->symmetric_set = PETSC_TRUE; 1777 A->structurally_symmetric_set = PETSC_TRUE; 1778 } 1779 break; 1780 case MAT_SYMMETRIC: 1781 MatCheckPreallocated(A,1); 1782 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1783 break; 1784 case MAT_STRUCTURALLY_SYMMETRIC: 1785 MatCheckPreallocated(A,1); 1786 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1787 break; 1788 case MAT_HERMITIAN: 1789 MatCheckPreallocated(A,1); 1790 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1791 break; 1792 case MAT_SYMMETRY_ETERNAL: 1793 MatCheckPreallocated(A,1); 1794 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1795 break; 1796 case MAT_SUBMAT_SINGLEIS: 1797 A->submat_singleis = flg; 1798 break; 1799 case MAT_STRUCTURE_ONLY: 1800 /* The option is handled directly by MatSetOption() */ 1801 break; 1802 default: 1803 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1804 } 1805 PetscFunctionReturn(0); 1806 } 1807 1808 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1809 { 1810 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1811 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1812 PetscErrorCode ierr; 1813 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1814 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1815 PetscInt *cmap,*idx_p; 1816 1817 PetscFunctionBegin; 1818 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1819 mat->getrowactive = PETSC_TRUE; 1820 1821 if (!mat->rowvalues && (idx || v)) { 1822 /* 1823 allocate enough space to hold information from the longest row. 1824 */ 1825 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1826 PetscInt max = 1,tmp; 1827 for (i=0; i<matin->rmap->n; i++) { 1828 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1829 if (max < tmp) max = tmp; 1830 } 1831 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1832 } 1833 1834 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1835 lrow = row - rstart; 1836 1837 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1838 if (!v) {pvA = 0; pvB = 0;} 1839 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1840 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1841 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1842 nztot = nzA + nzB; 1843 1844 cmap = mat->garray; 1845 if (v || idx) { 1846 if (nztot) { 1847 /* Sort by increasing column numbers, assuming A and B already sorted */ 1848 PetscInt imark = -1; 1849 if (v) { 1850 *v = v_p = mat->rowvalues; 1851 for (i=0; i<nzB; i++) { 1852 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1853 else break; 1854 } 1855 imark = i; 1856 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1857 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1858 } 1859 if (idx) { 1860 *idx = idx_p = mat->rowindices; 1861 if (imark > -1) { 1862 for (i=0; i<imark; i++) { 1863 idx_p[i] = cmap[cworkB[i]]; 1864 } 1865 } else { 1866 for (i=0; i<nzB; i++) { 1867 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1868 else break; 1869 } 1870 imark = i; 1871 } 1872 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1873 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1874 } 1875 } else { 1876 if (idx) *idx = 0; 1877 if (v) *v = 0; 1878 } 1879 } 1880 *nz = nztot; 1881 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1882 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1883 PetscFunctionReturn(0); 1884 } 1885 1886 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1887 { 1888 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1889 1890 PetscFunctionBegin; 1891 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1892 aij->getrowactive = PETSC_FALSE; 1893 PetscFunctionReturn(0); 1894 } 1895 1896 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1897 { 1898 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1899 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1900 PetscErrorCode ierr; 1901 PetscInt i,j,cstart = mat->cmap->rstart; 1902 PetscReal sum = 0.0; 1903 MatScalar *v; 1904 1905 PetscFunctionBegin; 1906 if (aij->size == 1) { 1907 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1908 } else { 1909 if (type == NORM_FROBENIUS) { 1910 v = amat->a; 1911 for (i=0; i<amat->nz; i++) { 1912 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1913 } 1914 v = bmat->a; 1915 for (i=0; i<bmat->nz; i++) { 1916 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1917 } 1918 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1919 *norm = PetscSqrtReal(*norm); 1920 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1921 } else if (type == NORM_1) { /* max column norm */ 1922 PetscReal *tmp,*tmp2; 1923 PetscInt *jj,*garray = aij->garray; 1924 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1925 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1926 *norm = 0.0; 1927 v = amat->a; jj = amat->j; 1928 for (j=0; j<amat->nz; j++) { 1929 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1930 } 1931 v = bmat->a; jj = bmat->j; 1932 for (j=0; j<bmat->nz; j++) { 1933 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1934 } 1935 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1936 for (j=0; j<mat->cmap->N; j++) { 1937 if (tmp2[j] > *norm) *norm = tmp2[j]; 1938 } 1939 ierr = PetscFree(tmp);CHKERRQ(ierr); 1940 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1941 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1942 } else if (type == NORM_INFINITY) { /* max row norm */ 1943 PetscReal ntemp = 0.0; 1944 for (j=0; j<aij->A->rmap->n; j++) { 1945 v = amat->a + amat->i[j]; 1946 sum = 0.0; 1947 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1948 sum += PetscAbsScalar(*v); v++; 1949 } 1950 v = bmat->a + bmat->i[j]; 1951 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1952 sum += PetscAbsScalar(*v); v++; 1953 } 1954 if (sum > ntemp) ntemp = sum; 1955 } 1956 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1957 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1958 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1959 } 1960 PetscFunctionReturn(0); 1961 } 1962 1963 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1964 { 1965 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1966 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1967 PetscErrorCode ierr; 1968 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1969 PetscInt cstart = A->cmap->rstart,ncol; 1970 Mat B; 1971 MatScalar *array; 1972 1973 PetscFunctionBegin; 1974 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1975 ai = Aloc->i; aj = Aloc->j; 1976 bi = Bloc->i; bj = Bloc->j; 1977 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1978 PetscInt *d_nnz,*g_nnz,*o_nnz; 1979 PetscSFNode *oloc; 1980 PETSC_UNUSED PetscSF sf; 1981 1982 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1983 /* compute d_nnz for preallocation */ 1984 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1985 for (i=0; i<ai[ma]; i++) { 1986 d_nnz[aj[i]]++; 1987 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1988 } 1989 /* compute local off-diagonal contributions */ 1990 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1991 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1992 /* map those to global */ 1993 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1994 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1995 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1996 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1997 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1998 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1999 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2000 2001 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2002 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2003 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2004 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2005 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2006 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2007 } else { 2008 B = *matout; 2009 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2010 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2011 } 2012 2013 /* copy over the A part */ 2014 array = Aloc->a; 2015 row = A->rmap->rstart; 2016 for (i=0; i<ma; i++) { 2017 ncol = ai[i+1]-ai[i]; 2018 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2019 row++; 2020 array += ncol; aj += ncol; 2021 } 2022 aj = Aloc->j; 2023 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2024 2025 /* copy over the B part */ 2026 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2027 array = Bloc->a; 2028 row = A->rmap->rstart; 2029 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2030 cols_tmp = cols; 2031 for (i=0; i<mb; i++) { 2032 ncol = bi[i+1]-bi[i]; 2033 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2034 row++; 2035 array += ncol; cols_tmp += ncol; 2036 } 2037 ierr = PetscFree(cols);CHKERRQ(ierr); 2038 2039 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2040 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2041 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2042 *matout = B; 2043 } else { 2044 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2045 } 2046 PetscFunctionReturn(0); 2047 } 2048 2049 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2050 { 2051 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2052 Mat a = aij->A,b = aij->B; 2053 PetscErrorCode ierr; 2054 PetscInt s1,s2,s3; 2055 2056 PetscFunctionBegin; 2057 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2058 if (rr) { 2059 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2060 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2061 /* Overlap communication with computation. */ 2062 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2063 } 2064 if (ll) { 2065 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2066 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2067 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2068 } 2069 /* scale the diagonal block */ 2070 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2071 2072 if (rr) { 2073 /* Do a scatter end and then right scale the off-diagonal block */ 2074 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2075 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2076 } 2077 PetscFunctionReturn(0); 2078 } 2079 2080 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2081 { 2082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2083 PetscErrorCode ierr; 2084 2085 PetscFunctionBegin; 2086 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2087 PetscFunctionReturn(0); 2088 } 2089 2090 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2091 { 2092 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2093 Mat a,b,c,d; 2094 PetscBool flg; 2095 PetscErrorCode ierr; 2096 2097 PetscFunctionBegin; 2098 a = matA->A; b = matA->B; 2099 c = matB->A; d = matB->B; 2100 2101 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2102 if (flg) { 2103 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2104 } 2105 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2106 PetscFunctionReturn(0); 2107 } 2108 2109 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2110 { 2111 PetscErrorCode ierr; 2112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2113 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2114 2115 PetscFunctionBegin; 2116 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2117 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2118 /* because of the column compression in the off-processor part of the matrix a->B, 2119 the number of columns in a->B and b->B may be different, hence we cannot call 2120 the MatCopy() directly on the two parts. If need be, we can provide a more 2121 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2122 then copying the submatrices */ 2123 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2124 } else { 2125 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2126 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2127 } 2128 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2129 PetscFunctionReturn(0); 2130 } 2131 2132 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2133 { 2134 PetscErrorCode ierr; 2135 2136 PetscFunctionBegin; 2137 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2138 PetscFunctionReturn(0); 2139 } 2140 2141 /* 2142 Computes the number of nonzeros per row needed for preallocation when X and Y 2143 have different nonzero structure. 2144 */ 2145 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2146 { 2147 PetscInt i,j,k,nzx,nzy; 2148 2149 PetscFunctionBegin; 2150 /* Set the number of nonzeros in the new matrix */ 2151 for (i=0; i<m; i++) { 2152 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2153 nzx = xi[i+1] - xi[i]; 2154 nzy = yi[i+1] - yi[i]; 2155 nnz[i] = 0; 2156 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2157 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2158 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2159 nnz[i]++; 2160 } 2161 for (; k<nzy; k++) nnz[i]++; 2162 } 2163 PetscFunctionReturn(0); 2164 } 2165 2166 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2167 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2168 { 2169 PetscErrorCode ierr; 2170 PetscInt m = Y->rmap->N; 2171 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2172 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2173 2174 PetscFunctionBegin; 2175 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2180 { 2181 PetscErrorCode ierr; 2182 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2183 PetscBLASInt bnz,one=1; 2184 Mat_SeqAIJ *x,*y; 2185 2186 PetscFunctionBegin; 2187 if (str == SAME_NONZERO_PATTERN) { 2188 PetscScalar alpha = a; 2189 x = (Mat_SeqAIJ*)xx->A->data; 2190 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2191 y = (Mat_SeqAIJ*)yy->A->data; 2192 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2193 x = (Mat_SeqAIJ*)xx->B->data; 2194 y = (Mat_SeqAIJ*)yy->B->data; 2195 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2196 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2197 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2198 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2199 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2200 } else { 2201 Mat B; 2202 PetscInt *nnz_d,*nnz_o; 2203 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2204 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2205 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2206 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2207 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2208 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2209 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2210 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2211 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2212 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2213 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2214 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2215 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2216 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2217 } 2218 PetscFunctionReturn(0); 2219 } 2220 2221 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2222 2223 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2224 { 2225 #if defined(PETSC_USE_COMPLEX) 2226 PetscErrorCode ierr; 2227 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2228 2229 PetscFunctionBegin; 2230 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2231 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2232 #else 2233 PetscFunctionBegin; 2234 #endif 2235 PetscFunctionReturn(0); 2236 } 2237 2238 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2239 { 2240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2241 PetscErrorCode ierr; 2242 2243 PetscFunctionBegin; 2244 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2245 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2246 PetscFunctionReturn(0); 2247 } 2248 2249 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2250 { 2251 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2252 PetscErrorCode ierr; 2253 2254 PetscFunctionBegin; 2255 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2256 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2257 PetscFunctionReturn(0); 2258 } 2259 2260 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2261 { 2262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2263 PetscErrorCode ierr; 2264 PetscInt i,*idxb = 0; 2265 PetscScalar *va,*vb; 2266 Vec vtmp; 2267 2268 PetscFunctionBegin; 2269 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2270 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2271 if (idx) { 2272 for (i=0; i<A->rmap->n; i++) { 2273 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2274 } 2275 } 2276 2277 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2278 if (idx) { 2279 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2280 } 2281 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2282 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2283 2284 for (i=0; i<A->rmap->n; i++) { 2285 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2286 va[i] = vb[i]; 2287 if (idx) idx[i] = a->garray[idxb[i]]; 2288 } 2289 } 2290 2291 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2292 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2293 ierr = PetscFree(idxb);CHKERRQ(ierr); 2294 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2295 PetscFunctionReturn(0); 2296 } 2297 2298 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2299 { 2300 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2301 PetscErrorCode ierr; 2302 PetscInt i,*idxb = 0; 2303 PetscScalar *va,*vb; 2304 Vec vtmp; 2305 2306 PetscFunctionBegin; 2307 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2308 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2309 if (idx) { 2310 for (i=0; i<A->cmap->n; i++) { 2311 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2312 } 2313 } 2314 2315 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2316 if (idx) { 2317 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2318 } 2319 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2320 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2321 2322 for (i=0; i<A->rmap->n; i++) { 2323 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2324 va[i] = vb[i]; 2325 if (idx) idx[i] = a->garray[idxb[i]]; 2326 } 2327 } 2328 2329 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2330 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2331 ierr = PetscFree(idxb);CHKERRQ(ierr); 2332 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2333 PetscFunctionReturn(0); 2334 } 2335 2336 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2337 { 2338 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2339 PetscInt n = A->rmap->n; 2340 PetscInt cstart = A->cmap->rstart; 2341 PetscInt *cmap = mat->garray; 2342 PetscInt *diagIdx, *offdiagIdx; 2343 Vec diagV, offdiagV; 2344 PetscScalar *a, *diagA, *offdiagA; 2345 PetscInt r; 2346 PetscErrorCode ierr; 2347 2348 PetscFunctionBegin; 2349 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2350 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2351 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2352 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2353 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2354 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2355 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2356 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2357 for (r = 0; r < n; ++r) { 2358 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2359 a[r] = diagA[r]; 2360 idx[r] = cstart + diagIdx[r]; 2361 } else { 2362 a[r] = offdiagA[r]; 2363 idx[r] = cmap[offdiagIdx[r]]; 2364 } 2365 } 2366 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2367 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2368 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2369 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2370 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2371 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2372 PetscFunctionReturn(0); 2373 } 2374 2375 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2376 { 2377 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2378 PetscInt n = A->rmap->n; 2379 PetscInt cstart = A->cmap->rstart; 2380 PetscInt *cmap = mat->garray; 2381 PetscInt *diagIdx, *offdiagIdx; 2382 Vec diagV, offdiagV; 2383 PetscScalar *a, *diagA, *offdiagA; 2384 PetscInt r; 2385 PetscErrorCode ierr; 2386 2387 PetscFunctionBegin; 2388 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2389 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2390 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2391 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2392 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2393 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2394 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2395 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2396 for (r = 0; r < n; ++r) { 2397 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2398 a[r] = diagA[r]; 2399 idx[r] = cstart + diagIdx[r]; 2400 } else { 2401 a[r] = offdiagA[r]; 2402 idx[r] = cmap[offdiagIdx[r]]; 2403 } 2404 } 2405 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2406 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2407 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2408 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2409 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2410 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2411 PetscFunctionReturn(0); 2412 } 2413 2414 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2415 { 2416 PetscErrorCode ierr; 2417 Mat *dummy; 2418 2419 PetscFunctionBegin; 2420 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2421 *newmat = *dummy; 2422 ierr = PetscFree(dummy);CHKERRQ(ierr); 2423 PetscFunctionReturn(0); 2424 } 2425 2426 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2427 { 2428 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2429 PetscErrorCode ierr; 2430 2431 PetscFunctionBegin; 2432 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2433 A->factorerrortype = a->A->factorerrortype; 2434 PetscFunctionReturn(0); 2435 } 2436 2437 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2438 { 2439 PetscErrorCode ierr; 2440 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2441 2442 PetscFunctionBegin; 2443 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2444 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2445 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2446 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 PetscFunctionReturn(0); 2448 } 2449 2450 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2451 { 2452 PetscFunctionBegin; 2453 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2454 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2455 PetscFunctionReturn(0); 2456 } 2457 2458 /*@ 2459 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2460 2461 Collective on Mat 2462 2463 Input Parameters: 2464 + A - the matrix 2465 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2466 2467 Level: advanced 2468 2469 @*/ 2470 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2471 { 2472 PetscErrorCode ierr; 2473 2474 PetscFunctionBegin; 2475 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2476 PetscFunctionReturn(0); 2477 } 2478 2479 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2480 { 2481 PetscErrorCode ierr; 2482 PetscBool sc = PETSC_FALSE,flg; 2483 2484 PetscFunctionBegin; 2485 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2486 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 2487 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2488 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2489 if (flg) { 2490 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2491 } 2492 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2493 PetscFunctionReturn(0); 2494 } 2495 2496 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2497 { 2498 PetscErrorCode ierr; 2499 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2500 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2501 2502 PetscFunctionBegin; 2503 if (!Y->preallocated) { 2504 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2505 } else if (!aij->nz) { 2506 PetscInt nonew = aij->nonew; 2507 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2508 aij->nonew = nonew; 2509 } 2510 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2511 PetscFunctionReturn(0); 2512 } 2513 2514 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2515 { 2516 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2517 PetscErrorCode ierr; 2518 2519 PetscFunctionBegin; 2520 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2521 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2522 if (d) { 2523 PetscInt rstart; 2524 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2525 *d += rstart; 2526 2527 } 2528 PetscFunctionReturn(0); 2529 } 2530 2531 2532 /* -------------------------------------------------------------------*/ 2533 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2534 MatGetRow_MPIAIJ, 2535 MatRestoreRow_MPIAIJ, 2536 MatMult_MPIAIJ, 2537 /* 4*/ MatMultAdd_MPIAIJ, 2538 MatMultTranspose_MPIAIJ, 2539 MatMultTransposeAdd_MPIAIJ, 2540 0, 2541 0, 2542 0, 2543 /*10*/ 0, 2544 0, 2545 0, 2546 MatSOR_MPIAIJ, 2547 MatTranspose_MPIAIJ, 2548 /*15*/ MatGetInfo_MPIAIJ, 2549 MatEqual_MPIAIJ, 2550 MatGetDiagonal_MPIAIJ, 2551 MatDiagonalScale_MPIAIJ, 2552 MatNorm_MPIAIJ, 2553 /*20*/ MatAssemblyBegin_MPIAIJ, 2554 MatAssemblyEnd_MPIAIJ, 2555 MatSetOption_MPIAIJ, 2556 MatZeroEntries_MPIAIJ, 2557 /*24*/ MatZeroRows_MPIAIJ, 2558 0, 2559 0, 2560 0, 2561 0, 2562 /*29*/ MatSetUp_MPIAIJ, 2563 0, 2564 0, 2565 MatGetDiagonalBlock_MPIAIJ, 2566 0, 2567 /*34*/ MatDuplicate_MPIAIJ, 2568 0, 2569 0, 2570 0, 2571 0, 2572 /*39*/ MatAXPY_MPIAIJ, 2573 MatCreateSubMatrices_MPIAIJ, 2574 MatIncreaseOverlap_MPIAIJ, 2575 MatGetValues_MPIAIJ, 2576 MatCopy_MPIAIJ, 2577 /*44*/ MatGetRowMax_MPIAIJ, 2578 MatScale_MPIAIJ, 2579 MatShift_MPIAIJ, 2580 MatDiagonalSet_MPIAIJ, 2581 MatZeroRowsColumns_MPIAIJ, 2582 /*49*/ MatSetRandom_MPIAIJ, 2583 0, 2584 0, 2585 0, 2586 0, 2587 /*54*/ MatFDColoringCreate_MPIXAIJ, 2588 0, 2589 MatSetUnfactored_MPIAIJ, 2590 MatPermute_MPIAIJ, 2591 0, 2592 /*59*/ MatCreateSubMatrix_MPIAIJ, 2593 MatDestroy_MPIAIJ, 2594 MatView_MPIAIJ, 2595 0, 2596 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2597 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2598 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2599 0, 2600 0, 2601 0, 2602 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2603 MatGetRowMinAbs_MPIAIJ, 2604 0, 2605 0, 2606 0, 2607 0, 2608 /*75*/ MatFDColoringApply_AIJ, 2609 MatSetFromOptions_MPIAIJ, 2610 0, 2611 0, 2612 MatFindZeroDiagonals_MPIAIJ, 2613 /*80*/ 0, 2614 0, 2615 0, 2616 /*83*/ MatLoad_MPIAIJ, 2617 MatIsSymmetric_MPIAIJ, 2618 0, 2619 0, 2620 0, 2621 0, 2622 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2623 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2624 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2625 MatPtAP_MPIAIJ_MPIAIJ, 2626 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2627 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2628 0, 2629 0, 2630 0, 2631 0, 2632 /*99*/ 0, 2633 0, 2634 0, 2635 MatConjugate_MPIAIJ, 2636 0, 2637 /*104*/MatSetValuesRow_MPIAIJ, 2638 MatRealPart_MPIAIJ, 2639 MatImaginaryPart_MPIAIJ, 2640 0, 2641 0, 2642 /*109*/0, 2643 0, 2644 MatGetRowMin_MPIAIJ, 2645 0, 2646 MatMissingDiagonal_MPIAIJ, 2647 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2648 0, 2649 MatGetGhosts_MPIAIJ, 2650 0, 2651 0, 2652 /*119*/0, 2653 0, 2654 0, 2655 0, 2656 MatGetMultiProcBlock_MPIAIJ, 2657 /*124*/MatFindNonzeroRows_MPIAIJ, 2658 MatGetColumnNorms_MPIAIJ, 2659 MatInvertBlockDiagonal_MPIAIJ, 2660 0, 2661 MatCreateSubMatricesMPI_MPIAIJ, 2662 /*129*/0, 2663 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2664 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2665 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2666 0, 2667 /*134*/0, 2668 0, 2669 MatRARt_MPIAIJ_MPIAIJ, 2670 0, 2671 0, 2672 /*139*/MatSetBlockSizes_MPIAIJ, 2673 0, 2674 0, 2675 MatFDColoringSetUp_MPIXAIJ, 2676 MatFindOffBlockDiagonalEntries_MPIAIJ, 2677 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2678 }; 2679 2680 /* ----------------------------------------------------------------------------------------*/ 2681 2682 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2683 { 2684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2685 PetscErrorCode ierr; 2686 2687 PetscFunctionBegin; 2688 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2689 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2690 PetscFunctionReturn(0); 2691 } 2692 2693 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2694 { 2695 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2696 PetscErrorCode ierr; 2697 2698 PetscFunctionBegin; 2699 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2700 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2701 PetscFunctionReturn(0); 2702 } 2703 2704 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2705 { 2706 Mat_MPIAIJ *b; 2707 PetscErrorCode ierr; 2708 2709 PetscFunctionBegin; 2710 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2711 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2712 b = (Mat_MPIAIJ*)B->data; 2713 2714 #if defined(PETSC_USE_CTABLE) 2715 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2716 #else 2717 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2718 #endif 2719 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2720 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2721 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2722 2723 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2724 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2725 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2726 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2727 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2728 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2729 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2730 2731 if (!B->preallocated) { 2732 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2733 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2734 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2735 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2736 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2737 } 2738 2739 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2740 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2741 B->preallocated = PETSC_TRUE; 2742 B->was_assembled = PETSC_FALSE; 2743 B->assembled = PETSC_FALSE;; 2744 PetscFunctionReturn(0); 2745 } 2746 2747 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2748 { 2749 Mat_MPIAIJ *b; 2750 PetscErrorCode ierr; 2751 2752 PetscFunctionBegin; 2753 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2754 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2755 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2756 b = (Mat_MPIAIJ*)B->data; 2757 2758 #if defined(PETSC_USE_CTABLE) 2759 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2760 #else 2761 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2762 #endif 2763 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2764 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2765 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2766 2767 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2768 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2769 B->preallocated = PETSC_TRUE; 2770 B->was_assembled = PETSC_FALSE; 2771 B->assembled = PETSC_FALSE; 2772 PetscFunctionReturn(0); 2773 } 2774 2775 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2776 { 2777 Mat mat; 2778 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2779 PetscErrorCode ierr; 2780 2781 PetscFunctionBegin; 2782 *newmat = 0; 2783 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2784 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2785 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2786 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2787 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2788 a = (Mat_MPIAIJ*)mat->data; 2789 2790 mat->factortype = matin->factortype; 2791 mat->assembled = PETSC_TRUE; 2792 mat->insertmode = NOT_SET_VALUES; 2793 mat->preallocated = PETSC_TRUE; 2794 2795 a->size = oldmat->size; 2796 a->rank = oldmat->rank; 2797 a->donotstash = oldmat->donotstash; 2798 a->roworiented = oldmat->roworiented; 2799 a->rowindices = 0; 2800 a->rowvalues = 0; 2801 a->getrowactive = PETSC_FALSE; 2802 2803 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2804 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2805 2806 if (oldmat->colmap) { 2807 #if defined(PETSC_USE_CTABLE) 2808 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2809 #else 2810 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2811 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2812 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2813 #endif 2814 } else a->colmap = 0; 2815 if (oldmat->garray) { 2816 PetscInt len; 2817 len = oldmat->B->cmap->n; 2818 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2819 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2820 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2821 } else a->garray = 0; 2822 2823 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2824 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2825 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2826 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2827 2828 if (oldmat->Mvctx_mpi1) { 2829 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2830 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2831 } 2832 2833 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2834 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2835 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2836 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2837 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2838 *newmat = mat; 2839 PetscFunctionReturn(0); 2840 } 2841 2842 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2843 { 2844 PetscScalar *vals,*svals; 2845 MPI_Comm comm; 2846 PetscErrorCode ierr; 2847 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2848 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2849 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2850 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2851 PetscInt cend,cstart,n,*rowners; 2852 int fd; 2853 PetscInt bs = newMat->rmap->bs; 2854 2855 PetscFunctionBegin; 2856 /* force binary viewer to load .info file if it has not yet done so */ 2857 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2858 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2859 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2860 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2861 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2862 if (!rank) { 2863 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2864 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2865 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2866 } 2867 2868 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2869 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2870 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2871 if (bs < 0) bs = 1; 2872 2873 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2874 M = header[1]; N = header[2]; 2875 2876 /* If global sizes are set, check if they are consistent with that given in the file */ 2877 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2878 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2879 2880 /* determine ownership of all (block) rows */ 2881 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2882 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2883 else m = newMat->rmap->n; /* Set by user */ 2884 2885 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2886 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2887 2888 /* First process needs enough room for process with most rows */ 2889 if (!rank) { 2890 mmax = rowners[1]; 2891 for (i=2; i<=size; i++) { 2892 mmax = PetscMax(mmax, rowners[i]); 2893 } 2894 } else mmax = -1; /* unused, but compilers complain */ 2895 2896 rowners[0] = 0; 2897 for (i=2; i<=size; i++) { 2898 rowners[i] += rowners[i-1]; 2899 } 2900 rstart = rowners[rank]; 2901 rend = rowners[rank+1]; 2902 2903 /* distribute row lengths to all processors */ 2904 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2905 if (!rank) { 2906 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2907 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2908 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2909 for (j=0; j<m; j++) { 2910 procsnz[0] += ourlens[j]; 2911 } 2912 for (i=1; i<size; i++) { 2913 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2914 /* calculate the number of nonzeros on each processor */ 2915 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2916 procsnz[i] += rowlengths[j]; 2917 } 2918 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2919 } 2920 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2921 } else { 2922 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2923 } 2924 2925 if (!rank) { 2926 /* determine max buffer needed and allocate it */ 2927 maxnz = 0; 2928 for (i=0; i<size; i++) { 2929 maxnz = PetscMax(maxnz,procsnz[i]); 2930 } 2931 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2932 2933 /* read in my part of the matrix column indices */ 2934 nz = procsnz[0]; 2935 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2936 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2937 2938 /* read in every one elses and ship off */ 2939 for (i=1; i<size; i++) { 2940 nz = procsnz[i]; 2941 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2942 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2943 } 2944 ierr = PetscFree(cols);CHKERRQ(ierr); 2945 } else { 2946 /* determine buffer space needed for message */ 2947 nz = 0; 2948 for (i=0; i<m; i++) { 2949 nz += ourlens[i]; 2950 } 2951 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2952 2953 /* receive message of column indices*/ 2954 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2955 } 2956 2957 /* determine column ownership if matrix is not square */ 2958 if (N != M) { 2959 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2960 else n = newMat->cmap->n; 2961 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2962 cstart = cend - n; 2963 } else { 2964 cstart = rstart; 2965 cend = rend; 2966 n = cend - cstart; 2967 } 2968 2969 /* loop over local rows, determining number of off diagonal entries */ 2970 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2971 jj = 0; 2972 for (i=0; i<m; i++) { 2973 for (j=0; j<ourlens[i]; j++) { 2974 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2975 jj++; 2976 } 2977 } 2978 2979 for (i=0; i<m; i++) { 2980 ourlens[i] -= offlens[i]; 2981 } 2982 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2983 2984 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2985 2986 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2987 2988 for (i=0; i<m; i++) { 2989 ourlens[i] += offlens[i]; 2990 } 2991 2992 if (!rank) { 2993 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2994 2995 /* read in my part of the matrix numerical values */ 2996 nz = procsnz[0]; 2997 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2998 2999 /* insert into matrix */ 3000 jj = rstart; 3001 smycols = mycols; 3002 svals = vals; 3003 for (i=0; i<m; i++) { 3004 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3005 smycols += ourlens[i]; 3006 svals += ourlens[i]; 3007 jj++; 3008 } 3009 3010 /* read in other processors and ship out */ 3011 for (i=1; i<size; i++) { 3012 nz = procsnz[i]; 3013 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3014 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3015 } 3016 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3017 } else { 3018 /* receive numeric values */ 3019 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3020 3021 /* receive message of values*/ 3022 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3023 3024 /* insert into matrix */ 3025 jj = rstart; 3026 smycols = mycols; 3027 svals = vals; 3028 for (i=0; i<m; i++) { 3029 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3030 smycols += ourlens[i]; 3031 svals += ourlens[i]; 3032 jj++; 3033 } 3034 } 3035 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3036 ierr = PetscFree(vals);CHKERRQ(ierr); 3037 ierr = PetscFree(mycols);CHKERRQ(ierr); 3038 ierr = PetscFree(rowners);CHKERRQ(ierr); 3039 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3040 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3041 PetscFunctionReturn(0); 3042 } 3043 3044 /* Not scalable because of ISAllGather() unless getting all columns. */ 3045 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3046 { 3047 PetscErrorCode ierr; 3048 IS iscol_local; 3049 PetscBool isstride; 3050 PetscMPIInt lisstride=0,gisstride; 3051 3052 PetscFunctionBegin; 3053 /* check if we are grabbing all columns*/ 3054 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3055 3056 if (isstride) { 3057 PetscInt start,len,mstart,mlen; 3058 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3059 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3060 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3061 if (mstart == start && mlen-mstart == len) lisstride = 1; 3062 } 3063 3064 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3065 if (gisstride) { 3066 PetscInt N; 3067 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3068 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3069 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3070 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3071 } else { 3072 PetscInt cbs; 3073 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3074 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3075 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3076 } 3077 3078 *isseq = iscol_local; 3079 PetscFunctionReturn(0); 3080 } 3081 3082 /* 3083 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3084 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3085 3086 Input Parameters: 3087 mat - matrix 3088 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3089 i.e., mat->rstart <= isrow[i] < mat->rend 3090 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3091 i.e., mat->cstart <= iscol[i] < mat->cend 3092 Output Parameter: 3093 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3094 iscol_o - sequential column index set for retrieving mat->B 3095 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3096 */ 3097 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3098 { 3099 PetscErrorCode ierr; 3100 Vec x,cmap; 3101 const PetscInt *is_idx; 3102 PetscScalar *xarray,*cmaparray; 3103 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3104 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3105 Mat B=a->B; 3106 Vec lvec=a->lvec,lcmap; 3107 PetscInt i,cstart,cend,Bn=B->cmap->N; 3108 MPI_Comm comm; 3109 VecScatter Mvctx=a->Mvctx; 3110 3111 PetscFunctionBegin; 3112 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3113 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3114 3115 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3116 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3117 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3118 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3119 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3120 3121 /* Get start indices */ 3122 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3123 isstart -= ncols; 3124 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3125 3126 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3127 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3128 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3129 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3130 for (i=0; i<ncols; i++) { 3131 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3132 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3133 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3134 } 3135 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3136 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3137 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3138 3139 /* Get iscol_d */ 3140 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3141 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3142 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3143 3144 /* Get isrow_d */ 3145 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3146 rstart = mat->rmap->rstart; 3147 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3148 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3149 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3150 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3151 3152 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3153 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3154 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3155 3156 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3157 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3158 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3159 3160 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3161 3162 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3163 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3164 3165 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3166 /* off-process column indices */ 3167 count = 0; 3168 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3169 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3170 3171 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3172 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3173 for (i=0; i<Bn; i++) { 3174 if (PetscRealPart(xarray[i]) > -1.0) { 3175 idx[count] = i; /* local column index in off-diagonal part B */ 3176 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3177 count++; 3178 } 3179 } 3180 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3181 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3182 3183 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3184 /* cannot ensure iscol_o has same blocksize as iscol! */ 3185 3186 ierr = PetscFree(idx);CHKERRQ(ierr); 3187 *garray = cmap1; 3188 3189 ierr = VecDestroy(&x);CHKERRQ(ierr); 3190 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3191 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3192 PetscFunctionReturn(0); 3193 } 3194 3195 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3196 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3197 { 3198 PetscErrorCode ierr; 3199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3200 Mat M = NULL; 3201 MPI_Comm comm; 3202 IS iscol_d,isrow_d,iscol_o; 3203 Mat Asub = NULL,Bsub = NULL; 3204 PetscInt n; 3205 3206 PetscFunctionBegin; 3207 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3208 3209 if (call == MAT_REUSE_MATRIX) { 3210 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3211 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3212 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3213 3214 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3215 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3216 3217 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3218 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3219 3220 /* Update diagonal and off-diagonal portions of submat */ 3221 asub = (Mat_MPIAIJ*)(*submat)->data; 3222 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3223 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3224 if (n) { 3225 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3226 } 3227 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3228 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3229 3230 } else { /* call == MAT_INITIAL_MATRIX) */ 3231 const PetscInt *garray; 3232 PetscInt BsubN; 3233 3234 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3235 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3236 3237 /* Create local submatrices Asub and Bsub */ 3238 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3239 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3240 3241 /* Create submatrix M */ 3242 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3243 3244 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3245 asub = (Mat_MPIAIJ*)M->data; 3246 3247 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3248 n = asub->B->cmap->N; 3249 if (BsubN > n) { 3250 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3251 const PetscInt *idx; 3252 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3253 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3254 3255 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3256 j = 0; 3257 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3258 for (i=0; i<n; i++) { 3259 if (j >= BsubN) break; 3260 while (subgarray[i] > garray[j]) j++; 3261 3262 if (subgarray[i] == garray[j]) { 3263 idx_new[i] = idx[j++]; 3264 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3265 } 3266 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3267 3268 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3269 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3270 3271 } else if (BsubN < n) { 3272 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3273 } 3274 3275 ierr = PetscFree(garray);CHKERRQ(ierr); 3276 *submat = M; 3277 3278 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3279 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3280 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3281 3282 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3283 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3284 3285 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3286 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3287 } 3288 PetscFunctionReturn(0); 3289 } 3290 3291 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3292 { 3293 PetscErrorCode ierr; 3294 IS iscol_local=NULL,isrow_d; 3295 PetscInt csize; 3296 PetscInt n,i,j,start,end; 3297 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3298 MPI_Comm comm; 3299 3300 PetscFunctionBegin; 3301 /* If isrow has same processor distribution as mat, 3302 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3303 if (call == MAT_REUSE_MATRIX) { 3304 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3305 if (isrow_d) { 3306 sameRowDist = PETSC_TRUE; 3307 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3308 } else { 3309 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3310 if (iscol_local) { 3311 sameRowDist = PETSC_TRUE; 3312 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3313 } 3314 } 3315 } else { 3316 /* Check if isrow has same processor distribution as mat */ 3317 sameDist[0] = PETSC_FALSE; 3318 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3319 if (!n) { 3320 sameDist[0] = PETSC_TRUE; 3321 } else { 3322 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3323 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3324 if (i >= start && j < end) { 3325 sameDist[0] = PETSC_TRUE; 3326 } 3327 } 3328 3329 /* Check if iscol has same processor distribution as mat */ 3330 sameDist[1] = PETSC_FALSE; 3331 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3332 if (!n) { 3333 sameDist[1] = PETSC_TRUE; 3334 } else { 3335 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3336 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3337 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3338 } 3339 3340 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3341 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3342 sameRowDist = tsameDist[0]; 3343 } 3344 3345 if (sameRowDist) { 3346 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3347 /* isrow and iscol have same processor distribution as mat */ 3348 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3349 PetscFunctionReturn(0); 3350 } else { /* sameRowDist */ 3351 /* isrow has same processor distribution as mat */ 3352 if (call == MAT_INITIAL_MATRIX) { 3353 PetscBool sorted; 3354 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3355 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3356 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3357 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3358 3359 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3360 if (sorted) { 3361 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3362 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3363 PetscFunctionReturn(0); 3364 } 3365 } else { /* call == MAT_REUSE_MATRIX */ 3366 IS iscol_sub; 3367 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3368 if (iscol_sub) { 3369 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3370 PetscFunctionReturn(0); 3371 } 3372 } 3373 } 3374 } 3375 3376 /* General case: iscol -> iscol_local which has global size of iscol */ 3377 if (call == MAT_REUSE_MATRIX) { 3378 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3379 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3380 } else { 3381 if (!iscol_local) { 3382 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3383 } 3384 } 3385 3386 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3387 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3388 3389 if (call == MAT_INITIAL_MATRIX) { 3390 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3391 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3392 } 3393 PetscFunctionReturn(0); 3394 } 3395 3396 /*@C 3397 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3398 and "off-diagonal" part of the matrix in CSR format. 3399 3400 Collective on MPI_Comm 3401 3402 Input Parameters: 3403 + comm - MPI communicator 3404 . A - "diagonal" portion of matrix 3405 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3406 - garray - global index of B columns 3407 3408 Output Parameter: 3409 . mat - the matrix, with input A as its local diagonal matrix 3410 Level: advanced 3411 3412 Notes: 3413 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3414 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3415 3416 .seealso: MatCreateMPIAIJWithSplitArrays() 3417 @*/ 3418 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3419 { 3420 PetscErrorCode ierr; 3421 Mat_MPIAIJ *maij; 3422 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3423 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3424 PetscScalar *oa=b->a; 3425 Mat Bnew; 3426 PetscInt m,n,N; 3427 3428 PetscFunctionBegin; 3429 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3430 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3431 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3432 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3433 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3434 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3435 3436 /* Get global columns of mat */ 3437 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3438 3439 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3440 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3441 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3442 maij = (Mat_MPIAIJ*)(*mat)->data; 3443 3444 (*mat)->preallocated = PETSC_TRUE; 3445 3446 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3447 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3448 3449 /* Set A as diagonal portion of *mat */ 3450 maij->A = A; 3451 3452 nz = oi[m]; 3453 for (i=0; i<nz; i++) { 3454 col = oj[i]; 3455 oj[i] = garray[col]; 3456 } 3457 3458 /* Set Bnew as off-diagonal portion of *mat */ 3459 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3460 bnew = (Mat_SeqAIJ*)Bnew->data; 3461 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3462 maij->B = Bnew; 3463 3464 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3465 3466 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3467 b->free_a = PETSC_FALSE; 3468 b->free_ij = PETSC_FALSE; 3469 ierr = MatDestroy(&B);CHKERRQ(ierr); 3470 3471 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3472 bnew->free_a = PETSC_TRUE; 3473 bnew->free_ij = PETSC_TRUE; 3474 3475 /* condense columns of maij->B */ 3476 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3477 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3478 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3479 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3480 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3481 PetscFunctionReturn(0); 3482 } 3483 3484 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3485 3486 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3487 { 3488 PetscErrorCode ierr; 3489 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3490 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3491 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3492 Mat M,Msub,B=a->B; 3493 MatScalar *aa; 3494 Mat_SeqAIJ *aij; 3495 PetscInt *garray = a->garray,*colsub,Ncols; 3496 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3497 IS iscol_sub,iscmap; 3498 const PetscInt *is_idx,*cmap; 3499 PetscBool allcolumns=PETSC_FALSE; 3500 MPI_Comm comm; 3501 3502 PetscFunctionBegin; 3503 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3504 3505 if (call == MAT_REUSE_MATRIX) { 3506 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3507 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3508 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3509 3510 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3511 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3512 3513 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3514 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3515 3516 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3517 3518 } else { /* call == MAT_INITIAL_MATRIX) */ 3519 PetscBool flg; 3520 3521 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3522 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3523 3524 /* (1) iscol -> nonscalable iscol_local */ 3525 /* Check for special case: each processor gets entire matrix columns */ 3526 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3527 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3528 if (allcolumns) { 3529 iscol_sub = iscol_local; 3530 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3531 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3532 3533 } else { 3534 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3535 PetscInt *idx,*cmap1,k; 3536 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3537 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3538 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3539 count = 0; 3540 k = 0; 3541 for (i=0; i<Ncols; i++) { 3542 j = is_idx[i]; 3543 if (j >= cstart && j < cend) { 3544 /* diagonal part of mat */ 3545 idx[count] = j; 3546 cmap1[count++] = i; /* column index in submat */ 3547 } else if (Bn) { 3548 /* off-diagonal part of mat */ 3549 if (j == garray[k]) { 3550 idx[count] = j; 3551 cmap1[count++] = i; /* column index in submat */ 3552 } else if (j > garray[k]) { 3553 while (j > garray[k] && k < Bn-1) k++; 3554 if (j == garray[k]) { 3555 idx[count] = j; 3556 cmap1[count++] = i; /* column index in submat */ 3557 } 3558 } 3559 } 3560 } 3561 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3562 3563 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3564 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3565 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3566 3567 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3568 } 3569 3570 /* (3) Create sequential Msub */ 3571 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3572 } 3573 3574 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3575 aij = (Mat_SeqAIJ*)(Msub)->data; 3576 ii = aij->i; 3577 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3578 3579 /* 3580 m - number of local rows 3581 Ncols - number of columns (same on all processors) 3582 rstart - first row in new global matrix generated 3583 */ 3584 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3585 3586 if (call == MAT_INITIAL_MATRIX) { 3587 /* (4) Create parallel newmat */ 3588 PetscMPIInt rank,size; 3589 PetscInt csize; 3590 3591 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3592 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3593 3594 /* 3595 Determine the number of non-zeros in the diagonal and off-diagonal 3596 portions of the matrix in order to do correct preallocation 3597 */ 3598 3599 /* first get start and end of "diagonal" columns */ 3600 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3601 if (csize == PETSC_DECIDE) { 3602 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3603 if (mglobal == Ncols) { /* square matrix */ 3604 nlocal = m; 3605 } else { 3606 nlocal = Ncols/size + ((Ncols % size) > rank); 3607 } 3608 } else { 3609 nlocal = csize; 3610 } 3611 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3612 rstart = rend - nlocal; 3613 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3614 3615 /* next, compute all the lengths */ 3616 jj = aij->j; 3617 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3618 olens = dlens + m; 3619 for (i=0; i<m; i++) { 3620 jend = ii[i+1] - ii[i]; 3621 olen = 0; 3622 dlen = 0; 3623 for (j=0; j<jend; j++) { 3624 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3625 else dlen++; 3626 jj++; 3627 } 3628 olens[i] = olen; 3629 dlens[i] = dlen; 3630 } 3631 3632 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3633 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3634 3635 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3636 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3637 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3638 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3639 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3640 ierr = PetscFree(dlens);CHKERRQ(ierr); 3641 3642 } else { /* call == MAT_REUSE_MATRIX */ 3643 M = *newmat; 3644 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3645 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3646 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3647 /* 3648 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3649 rather than the slower MatSetValues(). 3650 */ 3651 M->was_assembled = PETSC_TRUE; 3652 M->assembled = PETSC_FALSE; 3653 } 3654 3655 /* (5) Set values of Msub to *newmat */ 3656 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3657 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3658 3659 jj = aij->j; 3660 aa = aij->a; 3661 for (i=0; i<m; i++) { 3662 row = rstart + i; 3663 nz = ii[i+1] - ii[i]; 3664 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3665 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3666 jj += nz; aa += nz; 3667 } 3668 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3669 3670 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3671 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3672 3673 ierr = PetscFree(colsub);CHKERRQ(ierr); 3674 3675 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3676 if (call == MAT_INITIAL_MATRIX) { 3677 *newmat = M; 3678 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3679 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3680 3681 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3682 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3683 3684 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3685 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3686 3687 if (iscol_local) { 3688 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3689 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3690 } 3691 } 3692 PetscFunctionReturn(0); 3693 } 3694 3695 /* 3696 Not great since it makes two copies of the submatrix, first an SeqAIJ 3697 in local and then by concatenating the local matrices the end result. 3698 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3699 3700 Note: This requires a sequential iscol with all indices. 3701 */ 3702 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3703 { 3704 PetscErrorCode ierr; 3705 PetscMPIInt rank,size; 3706 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3707 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3708 Mat M,Mreuse; 3709 MatScalar *aa,*vwork; 3710 MPI_Comm comm; 3711 Mat_SeqAIJ *aij; 3712 PetscBool colflag,allcolumns=PETSC_FALSE; 3713 3714 PetscFunctionBegin; 3715 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3716 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3717 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3718 3719 /* Check for special case: each processor gets entire matrix columns */ 3720 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3721 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3722 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3723 3724 if (call == MAT_REUSE_MATRIX) { 3725 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3726 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3727 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3728 } else { 3729 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3730 } 3731 3732 /* 3733 m - number of local rows 3734 n - number of columns (same on all processors) 3735 rstart - first row in new global matrix generated 3736 */ 3737 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3738 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3739 if (call == MAT_INITIAL_MATRIX) { 3740 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3741 ii = aij->i; 3742 jj = aij->j; 3743 3744 /* 3745 Determine the number of non-zeros in the diagonal and off-diagonal 3746 portions of the matrix in order to do correct preallocation 3747 */ 3748 3749 /* first get start and end of "diagonal" columns */ 3750 if (csize == PETSC_DECIDE) { 3751 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3752 if (mglobal == n) { /* square matrix */ 3753 nlocal = m; 3754 } else { 3755 nlocal = n/size + ((n % size) > rank); 3756 } 3757 } else { 3758 nlocal = csize; 3759 } 3760 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3761 rstart = rend - nlocal; 3762 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3763 3764 /* next, compute all the lengths */ 3765 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3766 olens = dlens + m; 3767 for (i=0; i<m; i++) { 3768 jend = ii[i+1] - ii[i]; 3769 olen = 0; 3770 dlen = 0; 3771 for (j=0; j<jend; j++) { 3772 if (*jj < rstart || *jj >= rend) olen++; 3773 else dlen++; 3774 jj++; 3775 } 3776 olens[i] = olen; 3777 dlens[i] = dlen; 3778 } 3779 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3780 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3781 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3782 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3783 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3784 ierr = PetscFree(dlens);CHKERRQ(ierr); 3785 } else { 3786 PetscInt ml,nl; 3787 3788 M = *newmat; 3789 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3790 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3791 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3792 /* 3793 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3794 rather than the slower MatSetValues(). 3795 */ 3796 M->was_assembled = PETSC_TRUE; 3797 M->assembled = PETSC_FALSE; 3798 } 3799 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3800 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3801 ii = aij->i; 3802 jj = aij->j; 3803 aa = aij->a; 3804 for (i=0; i<m; i++) { 3805 row = rstart + i; 3806 nz = ii[i+1] - ii[i]; 3807 cwork = jj; jj += nz; 3808 vwork = aa; aa += nz; 3809 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3810 } 3811 3812 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3813 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3814 *newmat = M; 3815 3816 /* save submatrix used in processor for next request */ 3817 if (call == MAT_INITIAL_MATRIX) { 3818 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3819 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3820 } 3821 PetscFunctionReturn(0); 3822 } 3823 3824 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3825 { 3826 PetscInt m,cstart, cend,j,nnz,i,d; 3827 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3828 const PetscInt *JJ; 3829 PetscScalar *values; 3830 PetscErrorCode ierr; 3831 PetscBool nooffprocentries; 3832 3833 PetscFunctionBegin; 3834 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3835 3836 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3837 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3838 m = B->rmap->n; 3839 cstart = B->cmap->rstart; 3840 cend = B->cmap->rend; 3841 rstart = B->rmap->rstart; 3842 3843 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3844 3845 #if defined(PETSC_USE_DEBUG) 3846 for (i=0; i<m; i++) { 3847 nnz = Ii[i+1]- Ii[i]; 3848 JJ = J + Ii[i]; 3849 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3850 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3851 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3852 } 3853 #endif 3854 3855 for (i=0; i<m; i++) { 3856 nnz = Ii[i+1]- Ii[i]; 3857 JJ = J + Ii[i]; 3858 nnz_max = PetscMax(nnz_max,nnz); 3859 d = 0; 3860 for (j=0; j<nnz; j++) { 3861 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3862 } 3863 d_nnz[i] = d; 3864 o_nnz[i] = nnz - d; 3865 } 3866 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3867 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3868 3869 if (v) values = (PetscScalar*)v; 3870 else { 3871 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3872 } 3873 3874 for (i=0; i<m; i++) { 3875 ii = i + rstart; 3876 nnz = Ii[i+1]- Ii[i]; 3877 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3878 } 3879 nooffprocentries = B->nooffprocentries; 3880 B->nooffprocentries = PETSC_TRUE; 3881 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3882 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3883 B->nooffprocentries = nooffprocentries; 3884 3885 if (!v) { 3886 ierr = PetscFree(values);CHKERRQ(ierr); 3887 } 3888 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3889 PetscFunctionReturn(0); 3890 } 3891 3892 /*@ 3893 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3894 (the default parallel PETSc format). 3895 3896 Collective on MPI_Comm 3897 3898 Input Parameters: 3899 + B - the matrix 3900 . i - the indices into j for the start of each local row (starts with zero) 3901 . j - the column indices for each local row (starts with zero) 3902 - v - optional values in the matrix 3903 3904 Level: developer 3905 3906 Notes: 3907 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3908 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3909 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3910 3911 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3912 3913 The format which is used for the sparse matrix input, is equivalent to a 3914 row-major ordering.. i.e for the following matrix, the input data expected is 3915 as shown 3916 3917 $ 1 0 0 3918 $ 2 0 3 P0 3919 $ ------- 3920 $ 4 5 6 P1 3921 $ 3922 $ Process0 [P0]: rows_owned=[0,1] 3923 $ i = {0,1,3} [size = nrow+1 = 2+1] 3924 $ j = {0,0,2} [size = 3] 3925 $ v = {1,2,3} [size = 3] 3926 $ 3927 $ Process1 [P1]: rows_owned=[2] 3928 $ i = {0,3} [size = nrow+1 = 1+1] 3929 $ j = {0,1,2} [size = 3] 3930 $ v = {4,5,6} [size = 3] 3931 3932 .keywords: matrix, aij, compressed row, sparse, parallel 3933 3934 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3935 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3936 @*/ 3937 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3938 { 3939 PetscErrorCode ierr; 3940 3941 PetscFunctionBegin; 3942 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3943 PetscFunctionReturn(0); 3944 } 3945 3946 /*@C 3947 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3948 (the default parallel PETSc format). For good matrix assembly performance 3949 the user should preallocate the matrix storage by setting the parameters 3950 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3951 performance can be increased by more than a factor of 50. 3952 3953 Collective on MPI_Comm 3954 3955 Input Parameters: 3956 + B - the matrix 3957 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3958 (same value is used for all local rows) 3959 . d_nnz - array containing the number of nonzeros in the various rows of the 3960 DIAGONAL portion of the local submatrix (possibly different for each row) 3961 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3962 The size of this array is equal to the number of local rows, i.e 'm'. 3963 For matrices that will be factored, you must leave room for (and set) 3964 the diagonal entry even if it is zero. 3965 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3966 submatrix (same value is used for all local rows). 3967 - o_nnz - array containing the number of nonzeros in the various rows of the 3968 OFF-DIAGONAL portion of the local submatrix (possibly different for 3969 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3970 structure. The size of this array is equal to the number 3971 of local rows, i.e 'm'. 3972 3973 If the *_nnz parameter is given then the *_nz parameter is ignored 3974 3975 The AIJ format (also called the Yale sparse matrix format or 3976 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3977 storage. The stored row and column indices begin with zero. 3978 See Users-Manual: ch_mat for details. 3979 3980 The parallel matrix is partitioned such that the first m0 rows belong to 3981 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3982 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3983 3984 The DIAGONAL portion of the local submatrix of a processor can be defined 3985 as the submatrix which is obtained by extraction the part corresponding to 3986 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3987 first row that belongs to the processor, r2 is the last row belonging to 3988 the this processor, and c1-c2 is range of indices of the local part of a 3989 vector suitable for applying the matrix to. This is an mxn matrix. In the 3990 common case of a square matrix, the row and column ranges are the same and 3991 the DIAGONAL part is also square. The remaining portion of the local 3992 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3993 3994 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3995 3996 You can call MatGetInfo() to get information on how effective the preallocation was; 3997 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3998 You can also run with the option -info and look for messages with the string 3999 malloc in them to see if additional memory allocation was needed. 4000 4001 Example usage: 4002 4003 Consider the following 8x8 matrix with 34 non-zero values, that is 4004 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4005 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4006 as follows: 4007 4008 .vb 4009 1 2 0 | 0 3 0 | 0 4 4010 Proc0 0 5 6 | 7 0 0 | 8 0 4011 9 0 10 | 11 0 0 | 12 0 4012 ------------------------------------- 4013 13 0 14 | 15 16 17 | 0 0 4014 Proc1 0 18 0 | 19 20 21 | 0 0 4015 0 0 0 | 22 23 0 | 24 0 4016 ------------------------------------- 4017 Proc2 25 26 27 | 0 0 28 | 29 0 4018 30 0 0 | 31 32 33 | 0 34 4019 .ve 4020 4021 This can be represented as a collection of submatrices as: 4022 4023 .vb 4024 A B C 4025 D E F 4026 G H I 4027 .ve 4028 4029 Where the submatrices A,B,C are owned by proc0, D,E,F are 4030 owned by proc1, G,H,I are owned by proc2. 4031 4032 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4033 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4034 The 'M','N' parameters are 8,8, and have the same values on all procs. 4035 4036 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4037 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4038 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4039 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4040 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4041 matrix, ans [DF] as another SeqAIJ matrix. 4042 4043 When d_nz, o_nz parameters are specified, d_nz storage elements are 4044 allocated for every row of the local diagonal submatrix, and o_nz 4045 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4046 One way to choose d_nz and o_nz is to use the max nonzerors per local 4047 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4048 In this case, the values of d_nz,o_nz are: 4049 .vb 4050 proc0 : dnz = 2, o_nz = 2 4051 proc1 : dnz = 3, o_nz = 2 4052 proc2 : dnz = 1, o_nz = 4 4053 .ve 4054 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4055 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4056 for proc3. i.e we are using 12+15+10=37 storage locations to store 4057 34 values. 4058 4059 When d_nnz, o_nnz parameters are specified, the storage is specified 4060 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4061 In the above case the values for d_nnz,o_nnz are: 4062 .vb 4063 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4064 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4065 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4066 .ve 4067 Here the space allocated is sum of all the above values i.e 34, and 4068 hence pre-allocation is perfect. 4069 4070 Level: intermediate 4071 4072 .keywords: matrix, aij, compressed row, sparse, parallel 4073 4074 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4075 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4076 @*/ 4077 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4078 { 4079 PetscErrorCode ierr; 4080 4081 PetscFunctionBegin; 4082 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4083 PetscValidType(B,1); 4084 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4085 PetscFunctionReturn(0); 4086 } 4087 4088 /*@ 4089 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4090 CSR format the local rows. 4091 4092 Collective on MPI_Comm 4093 4094 Input Parameters: 4095 + comm - MPI communicator 4096 . m - number of local rows (Cannot be PETSC_DECIDE) 4097 . n - This value should be the same as the local size used in creating the 4098 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4099 calculated if N is given) For square matrices n is almost always m. 4100 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4101 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4102 . i - row indices 4103 . j - column indices 4104 - a - matrix values 4105 4106 Output Parameter: 4107 . mat - the matrix 4108 4109 Level: intermediate 4110 4111 Notes: 4112 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4113 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4114 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4115 4116 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4117 4118 The format which is used for the sparse matrix input, is equivalent to a 4119 row-major ordering.. i.e for the following matrix, the input data expected is 4120 as shown 4121 4122 $ 1 0 0 4123 $ 2 0 3 P0 4124 $ ------- 4125 $ 4 5 6 P1 4126 $ 4127 $ Process0 [P0]: rows_owned=[0,1] 4128 $ i = {0,1,3} [size = nrow+1 = 2+1] 4129 $ j = {0,0,2} [size = 3] 4130 $ v = {1,2,3} [size = 3] 4131 $ 4132 $ Process1 [P1]: rows_owned=[2] 4133 $ i = {0,3} [size = nrow+1 = 1+1] 4134 $ j = {0,1,2} [size = 3] 4135 $ v = {4,5,6} [size = 3] 4136 4137 .keywords: matrix, aij, compressed row, sparse, parallel 4138 4139 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4140 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4141 @*/ 4142 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4143 { 4144 PetscErrorCode ierr; 4145 4146 PetscFunctionBegin; 4147 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4148 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4149 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4150 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4151 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4152 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4153 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4154 PetscFunctionReturn(0); 4155 } 4156 4157 /*@C 4158 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4159 (the default parallel PETSc format). For good matrix assembly performance 4160 the user should preallocate the matrix storage by setting the parameters 4161 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4162 performance can be increased by more than a factor of 50. 4163 4164 Collective on MPI_Comm 4165 4166 Input Parameters: 4167 + comm - MPI communicator 4168 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4169 This value should be the same as the local size used in creating the 4170 y vector for the matrix-vector product y = Ax. 4171 . n - This value should be the same as the local size used in creating the 4172 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4173 calculated if N is given) For square matrices n is almost always m. 4174 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4175 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4176 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4177 (same value is used for all local rows) 4178 . d_nnz - array containing the number of nonzeros in the various rows of the 4179 DIAGONAL portion of the local submatrix (possibly different for each row) 4180 or NULL, if d_nz is used to specify the nonzero structure. 4181 The size of this array is equal to the number of local rows, i.e 'm'. 4182 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4183 submatrix (same value is used for all local rows). 4184 - o_nnz - array containing the number of nonzeros in the various rows of the 4185 OFF-DIAGONAL portion of the local submatrix (possibly different for 4186 each row) or NULL, if o_nz is used to specify the nonzero 4187 structure. The size of this array is equal to the number 4188 of local rows, i.e 'm'. 4189 4190 Output Parameter: 4191 . A - the matrix 4192 4193 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4194 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4195 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4196 4197 Notes: 4198 If the *_nnz parameter is given then the *_nz parameter is ignored 4199 4200 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4201 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4202 storage requirements for this matrix. 4203 4204 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4205 processor than it must be used on all processors that share the object for 4206 that argument. 4207 4208 The user MUST specify either the local or global matrix dimensions 4209 (possibly both). 4210 4211 The parallel matrix is partitioned across processors such that the 4212 first m0 rows belong to process 0, the next m1 rows belong to 4213 process 1, the next m2 rows belong to process 2 etc.. where 4214 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4215 values corresponding to [m x N] submatrix. 4216 4217 The columns are logically partitioned with the n0 columns belonging 4218 to 0th partition, the next n1 columns belonging to the next 4219 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4220 4221 The DIAGONAL portion of the local submatrix on any given processor 4222 is the submatrix corresponding to the rows and columns m,n 4223 corresponding to the given processor. i.e diagonal matrix on 4224 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4225 etc. The remaining portion of the local submatrix [m x (N-n)] 4226 constitute the OFF-DIAGONAL portion. The example below better 4227 illustrates this concept. 4228 4229 For a square global matrix we define each processor's diagonal portion 4230 to be its local rows and the corresponding columns (a square submatrix); 4231 each processor's off-diagonal portion encompasses the remainder of the 4232 local matrix (a rectangular submatrix). 4233 4234 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4235 4236 When calling this routine with a single process communicator, a matrix of 4237 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4238 type of communicator, use the construction mechanism 4239 .vb 4240 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4241 .ve 4242 4243 $ MatCreate(...,&A); 4244 $ MatSetType(A,MATMPIAIJ); 4245 $ MatSetSizes(A, m,n,M,N); 4246 $ MatMPIAIJSetPreallocation(A,...); 4247 4248 By default, this format uses inodes (identical nodes) when possible. 4249 We search for consecutive rows with the same nonzero structure, thereby 4250 reusing matrix information to achieve increased efficiency. 4251 4252 Options Database Keys: 4253 + -mat_no_inode - Do not use inodes 4254 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4255 4256 4257 4258 Example usage: 4259 4260 Consider the following 8x8 matrix with 34 non-zero values, that is 4261 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4262 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4263 as follows 4264 4265 .vb 4266 1 2 0 | 0 3 0 | 0 4 4267 Proc0 0 5 6 | 7 0 0 | 8 0 4268 9 0 10 | 11 0 0 | 12 0 4269 ------------------------------------- 4270 13 0 14 | 15 16 17 | 0 0 4271 Proc1 0 18 0 | 19 20 21 | 0 0 4272 0 0 0 | 22 23 0 | 24 0 4273 ------------------------------------- 4274 Proc2 25 26 27 | 0 0 28 | 29 0 4275 30 0 0 | 31 32 33 | 0 34 4276 .ve 4277 4278 This can be represented as a collection of submatrices as 4279 4280 .vb 4281 A B C 4282 D E F 4283 G H I 4284 .ve 4285 4286 Where the submatrices A,B,C are owned by proc0, D,E,F are 4287 owned by proc1, G,H,I are owned by proc2. 4288 4289 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4290 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4291 The 'M','N' parameters are 8,8, and have the same values on all procs. 4292 4293 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4294 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4295 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4296 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4297 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4298 matrix, ans [DF] as another SeqAIJ matrix. 4299 4300 When d_nz, o_nz parameters are specified, d_nz storage elements are 4301 allocated for every row of the local diagonal submatrix, and o_nz 4302 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4303 One way to choose d_nz and o_nz is to use the max nonzerors per local 4304 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4305 In this case, the values of d_nz,o_nz are 4306 .vb 4307 proc0 : dnz = 2, o_nz = 2 4308 proc1 : dnz = 3, o_nz = 2 4309 proc2 : dnz = 1, o_nz = 4 4310 .ve 4311 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4312 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4313 for proc3. i.e we are using 12+15+10=37 storage locations to store 4314 34 values. 4315 4316 When d_nnz, o_nnz parameters are specified, the storage is specified 4317 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4318 In the above case the values for d_nnz,o_nnz are 4319 .vb 4320 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4321 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4322 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4323 .ve 4324 Here the space allocated is sum of all the above values i.e 34, and 4325 hence pre-allocation is perfect. 4326 4327 Level: intermediate 4328 4329 .keywords: matrix, aij, compressed row, sparse, parallel 4330 4331 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4332 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4333 @*/ 4334 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4335 { 4336 PetscErrorCode ierr; 4337 PetscMPIInt size; 4338 4339 PetscFunctionBegin; 4340 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4341 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4342 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4343 if (size > 1) { 4344 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4345 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4346 } else { 4347 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4348 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4349 } 4350 PetscFunctionReturn(0); 4351 } 4352 4353 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4354 { 4355 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4356 PetscBool flg; 4357 PetscErrorCode ierr; 4358 4359 PetscFunctionBegin; 4360 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4361 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4362 if (Ad) *Ad = a->A; 4363 if (Ao) *Ao = a->B; 4364 if (colmap) *colmap = a->garray; 4365 PetscFunctionReturn(0); 4366 } 4367 4368 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4369 { 4370 PetscErrorCode ierr; 4371 PetscInt m,N,i,rstart,nnz,Ii; 4372 PetscInt *indx; 4373 PetscScalar *values; 4374 4375 PetscFunctionBegin; 4376 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4377 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4378 PetscInt *dnz,*onz,sum,bs,cbs; 4379 4380 if (n == PETSC_DECIDE) { 4381 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4382 } 4383 /* Check sum(n) = N */ 4384 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4385 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4386 4387 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4388 rstart -= m; 4389 4390 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4391 for (i=0; i<m; i++) { 4392 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4393 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4394 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4395 } 4396 4397 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4398 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4399 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4400 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4401 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4402 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4403 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4404 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4405 } 4406 4407 /* numeric phase */ 4408 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4409 for (i=0; i<m; i++) { 4410 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4411 Ii = i + rstart; 4412 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4413 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4414 } 4415 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4416 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4417 PetscFunctionReturn(0); 4418 } 4419 4420 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4421 { 4422 PetscErrorCode ierr; 4423 PetscMPIInt rank; 4424 PetscInt m,N,i,rstart,nnz; 4425 size_t len; 4426 const PetscInt *indx; 4427 PetscViewer out; 4428 char *name; 4429 Mat B; 4430 const PetscScalar *values; 4431 4432 PetscFunctionBegin; 4433 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4434 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4435 /* Should this be the type of the diagonal block of A? */ 4436 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4437 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4438 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4439 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4440 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4441 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4442 for (i=0; i<m; i++) { 4443 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4444 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4445 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4446 } 4447 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4448 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4449 4450 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4451 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4452 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4453 sprintf(name,"%s.%d",outfile,rank); 4454 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4455 ierr = PetscFree(name);CHKERRQ(ierr); 4456 ierr = MatView(B,out);CHKERRQ(ierr); 4457 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4458 ierr = MatDestroy(&B);CHKERRQ(ierr); 4459 PetscFunctionReturn(0); 4460 } 4461 4462 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4463 { 4464 PetscErrorCode ierr; 4465 Mat_Merge_SeqsToMPI *merge; 4466 PetscContainer container; 4467 4468 PetscFunctionBegin; 4469 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4470 if (container) { 4471 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4472 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4473 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4474 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4475 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4476 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4477 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4478 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4479 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4480 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4482 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4483 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4484 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4485 ierr = PetscFree(merge);CHKERRQ(ierr); 4486 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4487 } 4488 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4489 PetscFunctionReturn(0); 4490 } 4491 4492 #include <../src/mat/utils/freespace.h> 4493 #include <petscbt.h> 4494 4495 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4496 { 4497 PetscErrorCode ierr; 4498 MPI_Comm comm; 4499 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4500 PetscMPIInt size,rank,taga,*len_s; 4501 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4502 PetscInt proc,m; 4503 PetscInt **buf_ri,**buf_rj; 4504 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4505 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4506 MPI_Request *s_waits,*r_waits; 4507 MPI_Status *status; 4508 MatScalar *aa=a->a; 4509 MatScalar **abuf_r,*ba_i; 4510 Mat_Merge_SeqsToMPI *merge; 4511 PetscContainer container; 4512 4513 PetscFunctionBegin; 4514 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4515 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4516 4517 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4518 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4519 4520 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4521 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4522 4523 bi = merge->bi; 4524 bj = merge->bj; 4525 buf_ri = merge->buf_ri; 4526 buf_rj = merge->buf_rj; 4527 4528 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4529 owners = merge->rowmap->range; 4530 len_s = merge->len_s; 4531 4532 /* send and recv matrix values */ 4533 /*-----------------------------*/ 4534 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4535 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4536 4537 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4538 for (proc=0,k=0; proc<size; proc++) { 4539 if (!len_s[proc]) continue; 4540 i = owners[proc]; 4541 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4542 k++; 4543 } 4544 4545 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4546 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4547 ierr = PetscFree(status);CHKERRQ(ierr); 4548 4549 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4550 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4551 4552 /* insert mat values of mpimat */ 4553 /*----------------------------*/ 4554 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4555 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4556 4557 for (k=0; k<merge->nrecv; k++) { 4558 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4559 nrows = *(buf_ri_k[k]); 4560 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4561 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4562 } 4563 4564 /* set values of ba */ 4565 m = merge->rowmap->n; 4566 for (i=0; i<m; i++) { 4567 arow = owners[rank] + i; 4568 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4569 bnzi = bi[i+1] - bi[i]; 4570 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4571 4572 /* add local non-zero vals of this proc's seqmat into ba */ 4573 anzi = ai[arow+1] - ai[arow]; 4574 aj = a->j + ai[arow]; 4575 aa = a->a + ai[arow]; 4576 nextaj = 0; 4577 for (j=0; nextaj<anzi; j++) { 4578 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4579 ba_i[j] += aa[nextaj++]; 4580 } 4581 } 4582 4583 /* add received vals into ba */ 4584 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4585 /* i-th row */ 4586 if (i == *nextrow[k]) { 4587 anzi = *(nextai[k]+1) - *nextai[k]; 4588 aj = buf_rj[k] + *(nextai[k]); 4589 aa = abuf_r[k] + *(nextai[k]); 4590 nextaj = 0; 4591 for (j=0; nextaj<anzi; j++) { 4592 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4593 ba_i[j] += aa[nextaj++]; 4594 } 4595 } 4596 nextrow[k]++; nextai[k]++; 4597 } 4598 } 4599 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4600 } 4601 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4602 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4603 4604 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4605 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4606 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4607 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4608 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4609 PetscFunctionReturn(0); 4610 } 4611 4612 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4613 { 4614 PetscErrorCode ierr; 4615 Mat B_mpi; 4616 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4617 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4618 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4619 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4620 PetscInt len,proc,*dnz,*onz,bs,cbs; 4621 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4622 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4623 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4624 MPI_Status *status; 4625 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4626 PetscBT lnkbt; 4627 Mat_Merge_SeqsToMPI *merge; 4628 PetscContainer container; 4629 4630 PetscFunctionBegin; 4631 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4632 4633 /* make sure it is a PETSc comm */ 4634 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4635 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4636 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4637 4638 ierr = PetscNew(&merge);CHKERRQ(ierr); 4639 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4640 4641 /* determine row ownership */ 4642 /*---------------------------------------------------------*/ 4643 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4644 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4645 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4646 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4647 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4648 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4649 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4650 4651 m = merge->rowmap->n; 4652 owners = merge->rowmap->range; 4653 4654 /* determine the number of messages to send, their lengths */ 4655 /*---------------------------------------------------------*/ 4656 len_s = merge->len_s; 4657 4658 len = 0; /* length of buf_si[] */ 4659 merge->nsend = 0; 4660 for (proc=0; proc<size; proc++) { 4661 len_si[proc] = 0; 4662 if (proc == rank) { 4663 len_s[proc] = 0; 4664 } else { 4665 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4666 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4667 } 4668 if (len_s[proc]) { 4669 merge->nsend++; 4670 nrows = 0; 4671 for (i=owners[proc]; i<owners[proc+1]; i++) { 4672 if (ai[i+1] > ai[i]) nrows++; 4673 } 4674 len_si[proc] = 2*(nrows+1); 4675 len += len_si[proc]; 4676 } 4677 } 4678 4679 /* determine the number and length of messages to receive for ij-structure */ 4680 /*-------------------------------------------------------------------------*/ 4681 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4682 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4683 4684 /* post the Irecv of j-structure */ 4685 /*-------------------------------*/ 4686 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4687 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4688 4689 /* post the Isend of j-structure */ 4690 /*--------------------------------*/ 4691 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4692 4693 for (proc=0, k=0; proc<size; proc++) { 4694 if (!len_s[proc]) continue; 4695 i = owners[proc]; 4696 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4697 k++; 4698 } 4699 4700 /* receives and sends of j-structure are complete */ 4701 /*------------------------------------------------*/ 4702 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4703 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4704 4705 /* send and recv i-structure */ 4706 /*---------------------------*/ 4707 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4708 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4709 4710 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4711 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4712 for (proc=0,k=0; proc<size; proc++) { 4713 if (!len_s[proc]) continue; 4714 /* form outgoing message for i-structure: 4715 buf_si[0]: nrows to be sent 4716 [1:nrows]: row index (global) 4717 [nrows+1:2*nrows+1]: i-structure index 4718 */ 4719 /*-------------------------------------------*/ 4720 nrows = len_si[proc]/2 - 1; 4721 buf_si_i = buf_si + nrows+1; 4722 buf_si[0] = nrows; 4723 buf_si_i[0] = 0; 4724 nrows = 0; 4725 for (i=owners[proc]; i<owners[proc+1]; i++) { 4726 anzi = ai[i+1] - ai[i]; 4727 if (anzi) { 4728 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4729 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4730 nrows++; 4731 } 4732 } 4733 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4734 k++; 4735 buf_si += len_si[proc]; 4736 } 4737 4738 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4739 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4740 4741 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4742 for (i=0; i<merge->nrecv; i++) { 4743 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4744 } 4745 4746 ierr = PetscFree(len_si);CHKERRQ(ierr); 4747 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4748 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4749 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4750 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4751 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4752 ierr = PetscFree(status);CHKERRQ(ierr); 4753 4754 /* compute a local seq matrix in each processor */ 4755 /*----------------------------------------------*/ 4756 /* allocate bi array and free space for accumulating nonzero column info */ 4757 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4758 bi[0] = 0; 4759 4760 /* create and initialize a linked list */ 4761 nlnk = N+1; 4762 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4763 4764 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4765 len = ai[owners[rank+1]] - ai[owners[rank]]; 4766 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4767 4768 current_space = free_space; 4769 4770 /* determine symbolic info for each local row */ 4771 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4772 4773 for (k=0; k<merge->nrecv; k++) { 4774 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4775 nrows = *buf_ri_k[k]; 4776 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4777 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4778 } 4779 4780 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4781 len = 0; 4782 for (i=0; i<m; i++) { 4783 bnzi = 0; 4784 /* add local non-zero cols of this proc's seqmat into lnk */ 4785 arow = owners[rank] + i; 4786 anzi = ai[arow+1] - ai[arow]; 4787 aj = a->j + ai[arow]; 4788 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4789 bnzi += nlnk; 4790 /* add received col data into lnk */ 4791 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4792 if (i == *nextrow[k]) { /* i-th row */ 4793 anzi = *(nextai[k]+1) - *nextai[k]; 4794 aj = buf_rj[k] + *nextai[k]; 4795 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4796 bnzi += nlnk; 4797 nextrow[k]++; nextai[k]++; 4798 } 4799 } 4800 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4801 4802 /* if free space is not available, make more free space */ 4803 if (current_space->local_remaining<bnzi) { 4804 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4805 nspacedouble++; 4806 } 4807 /* copy data into free space, then initialize lnk */ 4808 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4809 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4810 4811 current_space->array += bnzi; 4812 current_space->local_used += bnzi; 4813 current_space->local_remaining -= bnzi; 4814 4815 bi[i+1] = bi[i] + bnzi; 4816 } 4817 4818 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4819 4820 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4821 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4822 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4823 4824 /* create symbolic parallel matrix B_mpi */ 4825 /*---------------------------------------*/ 4826 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4827 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4828 if (n==PETSC_DECIDE) { 4829 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4830 } else { 4831 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4832 } 4833 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4834 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4835 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4836 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4837 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4838 4839 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4840 B_mpi->assembled = PETSC_FALSE; 4841 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4842 merge->bi = bi; 4843 merge->bj = bj; 4844 merge->buf_ri = buf_ri; 4845 merge->buf_rj = buf_rj; 4846 merge->coi = NULL; 4847 merge->coj = NULL; 4848 merge->owners_co = NULL; 4849 4850 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4851 4852 /* attach the supporting struct to B_mpi for reuse */ 4853 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4854 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4855 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4856 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4857 *mpimat = B_mpi; 4858 4859 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4860 PetscFunctionReturn(0); 4861 } 4862 4863 /*@C 4864 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4865 matrices from each processor 4866 4867 Collective on MPI_Comm 4868 4869 Input Parameters: 4870 + comm - the communicators the parallel matrix will live on 4871 . seqmat - the input sequential matrices 4872 . m - number of local rows (or PETSC_DECIDE) 4873 . n - number of local columns (or PETSC_DECIDE) 4874 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4875 4876 Output Parameter: 4877 . mpimat - the parallel matrix generated 4878 4879 Level: advanced 4880 4881 Notes: 4882 The dimensions of the sequential matrix in each processor MUST be the same. 4883 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4884 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4885 @*/ 4886 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4887 { 4888 PetscErrorCode ierr; 4889 PetscMPIInt size; 4890 4891 PetscFunctionBegin; 4892 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4893 if (size == 1) { 4894 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4895 if (scall == MAT_INITIAL_MATRIX) { 4896 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4897 } else { 4898 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4899 } 4900 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4901 PetscFunctionReturn(0); 4902 } 4903 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4904 if (scall == MAT_INITIAL_MATRIX) { 4905 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4906 } 4907 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4908 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4909 PetscFunctionReturn(0); 4910 } 4911 4912 /*@ 4913 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4914 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4915 with MatGetSize() 4916 4917 Not Collective 4918 4919 Input Parameters: 4920 + A - the matrix 4921 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4922 4923 Output Parameter: 4924 . A_loc - the local sequential matrix generated 4925 4926 Level: developer 4927 4928 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4929 4930 @*/ 4931 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4932 { 4933 PetscErrorCode ierr; 4934 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4935 Mat_SeqAIJ *mat,*a,*b; 4936 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4937 MatScalar *aa,*ba,*cam; 4938 PetscScalar *ca; 4939 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4940 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4941 PetscBool match; 4942 MPI_Comm comm; 4943 PetscMPIInt size; 4944 4945 PetscFunctionBegin; 4946 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4947 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4948 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4949 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4950 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4951 4952 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4953 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4954 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4955 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4956 aa = a->a; ba = b->a; 4957 if (scall == MAT_INITIAL_MATRIX) { 4958 if (size == 1) { 4959 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4960 PetscFunctionReturn(0); 4961 } 4962 4963 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4964 ci[0] = 0; 4965 for (i=0; i<am; i++) { 4966 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4967 } 4968 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4969 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4970 k = 0; 4971 for (i=0; i<am; i++) { 4972 ncols_o = bi[i+1] - bi[i]; 4973 ncols_d = ai[i+1] - ai[i]; 4974 /* off-diagonal portion of A */ 4975 for (jo=0; jo<ncols_o; jo++) { 4976 col = cmap[*bj]; 4977 if (col >= cstart) break; 4978 cj[k] = col; bj++; 4979 ca[k++] = *ba++; 4980 } 4981 /* diagonal portion of A */ 4982 for (j=0; j<ncols_d; j++) { 4983 cj[k] = cstart + *aj++; 4984 ca[k++] = *aa++; 4985 } 4986 /* off-diagonal portion of A */ 4987 for (j=jo; j<ncols_o; j++) { 4988 cj[k] = cmap[*bj++]; 4989 ca[k++] = *ba++; 4990 } 4991 } 4992 /* put together the new matrix */ 4993 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4994 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4995 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4996 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4997 mat->free_a = PETSC_TRUE; 4998 mat->free_ij = PETSC_TRUE; 4999 mat->nonew = 0; 5000 } else if (scall == MAT_REUSE_MATRIX) { 5001 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5002 ci = mat->i; cj = mat->j; cam = mat->a; 5003 for (i=0; i<am; i++) { 5004 /* off-diagonal portion of A */ 5005 ncols_o = bi[i+1] - bi[i]; 5006 for (jo=0; jo<ncols_o; jo++) { 5007 col = cmap[*bj]; 5008 if (col >= cstart) break; 5009 *cam++ = *ba++; bj++; 5010 } 5011 /* diagonal portion of A */ 5012 ncols_d = ai[i+1] - ai[i]; 5013 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5014 /* off-diagonal portion of A */ 5015 for (j=jo; j<ncols_o; j++) { 5016 *cam++ = *ba++; bj++; 5017 } 5018 } 5019 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5020 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5021 PetscFunctionReturn(0); 5022 } 5023 5024 /*@C 5025 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5026 5027 Not Collective 5028 5029 Input Parameters: 5030 + A - the matrix 5031 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5032 - row, col - index sets of rows and columns to extract (or NULL) 5033 5034 Output Parameter: 5035 . A_loc - the local sequential matrix generated 5036 5037 Level: developer 5038 5039 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5040 5041 @*/ 5042 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5043 { 5044 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5045 PetscErrorCode ierr; 5046 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5047 IS isrowa,iscola; 5048 Mat *aloc; 5049 PetscBool match; 5050 5051 PetscFunctionBegin; 5052 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5053 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5054 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5055 if (!row) { 5056 start = A->rmap->rstart; end = A->rmap->rend; 5057 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5058 } else { 5059 isrowa = *row; 5060 } 5061 if (!col) { 5062 start = A->cmap->rstart; 5063 cmap = a->garray; 5064 nzA = a->A->cmap->n; 5065 nzB = a->B->cmap->n; 5066 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5067 ncols = 0; 5068 for (i=0; i<nzB; i++) { 5069 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5070 else break; 5071 } 5072 imark = i; 5073 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5074 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5075 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5076 } else { 5077 iscola = *col; 5078 } 5079 if (scall != MAT_INITIAL_MATRIX) { 5080 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5081 aloc[0] = *A_loc; 5082 } 5083 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5084 if (!col) { /* attach global id of condensed columns */ 5085 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5086 } 5087 *A_loc = aloc[0]; 5088 ierr = PetscFree(aloc);CHKERRQ(ierr); 5089 if (!row) { 5090 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5091 } 5092 if (!col) { 5093 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5094 } 5095 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5096 PetscFunctionReturn(0); 5097 } 5098 5099 /*@C 5100 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5101 5102 Collective on Mat 5103 5104 Input Parameters: 5105 + A,B - the matrices in mpiaij format 5106 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5107 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5108 5109 Output Parameter: 5110 + rowb, colb - index sets of rows and columns of B to extract 5111 - B_seq - the sequential matrix generated 5112 5113 Level: developer 5114 5115 @*/ 5116 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5117 { 5118 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5119 PetscErrorCode ierr; 5120 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5121 IS isrowb,iscolb; 5122 Mat *bseq=NULL; 5123 5124 PetscFunctionBegin; 5125 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5126 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5127 } 5128 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5129 5130 if (scall == MAT_INITIAL_MATRIX) { 5131 start = A->cmap->rstart; 5132 cmap = a->garray; 5133 nzA = a->A->cmap->n; 5134 nzB = a->B->cmap->n; 5135 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5136 ncols = 0; 5137 for (i=0; i<nzB; i++) { /* row < local row index */ 5138 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5139 else break; 5140 } 5141 imark = i; 5142 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5143 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5144 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5145 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5146 } else { 5147 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5148 isrowb = *rowb; iscolb = *colb; 5149 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5150 bseq[0] = *B_seq; 5151 } 5152 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5153 *B_seq = bseq[0]; 5154 ierr = PetscFree(bseq);CHKERRQ(ierr); 5155 if (!rowb) { 5156 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5157 } else { 5158 *rowb = isrowb; 5159 } 5160 if (!colb) { 5161 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5162 } else { 5163 *colb = iscolb; 5164 } 5165 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5166 PetscFunctionReturn(0); 5167 } 5168 5169 /* 5170 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5171 of the OFF-DIAGONAL portion of local A 5172 5173 Collective on Mat 5174 5175 Input Parameters: 5176 + A,B - the matrices in mpiaij format 5177 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5178 5179 Output Parameter: 5180 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5181 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5182 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5183 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5184 5185 Level: developer 5186 5187 */ 5188 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5189 { 5190 VecScatter_MPI_General *gen_to,*gen_from; 5191 PetscErrorCode ierr; 5192 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5193 Mat_SeqAIJ *b_oth; 5194 VecScatter ctx; 5195 MPI_Comm comm; 5196 PetscMPIInt *rprocs,*sprocs,tag,rank; 5197 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5198 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5199 PetscScalar *b_otha,*bufa,*bufA,*vals; 5200 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5201 MPI_Request *rwaits = NULL,*swaits = NULL; 5202 MPI_Status *sstatus,rstatus; 5203 PetscMPIInt jj,size; 5204 VecScatterType type; 5205 PetscBool mpi1; 5206 5207 PetscFunctionBegin; 5208 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5209 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5210 5211 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5212 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5213 } 5214 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5215 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5216 5217 if (size == 1) { 5218 startsj_s = NULL; 5219 bufa_ptr = NULL; 5220 *B_oth = NULL; 5221 PetscFunctionReturn(0); 5222 } 5223 5224 ctx = a->Mvctx; 5225 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5226 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5227 if (!mpi1) { 5228 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5229 thus create a->Mvctx_mpi1 */ 5230 if (!a->Mvctx_mpi1) { 5231 a->Mvctx_mpi1_flg = PETSC_TRUE; 5232 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5233 } 5234 ctx = a->Mvctx_mpi1; 5235 } 5236 tag = ((PetscObject)ctx)->tag; 5237 5238 gen_to = (VecScatter_MPI_General*)ctx->todata; 5239 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5240 nrecvs = gen_from->n; 5241 nsends = gen_to->n; 5242 5243 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5244 srow = gen_to->indices; /* local row index to be sent */ 5245 sstarts = gen_to->starts; 5246 sprocs = gen_to->procs; 5247 sstatus = gen_to->sstatus; 5248 sbs = gen_to->bs; 5249 rstarts = gen_from->starts; 5250 rprocs = gen_from->procs; 5251 rbs = gen_from->bs; 5252 5253 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5254 if (scall == MAT_INITIAL_MATRIX) { 5255 /* i-array */ 5256 /*---------*/ 5257 /* post receives */ 5258 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5259 for (i=0; i<nrecvs; i++) { 5260 rowlen = rvalues + rstarts[i]*rbs; 5261 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5262 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5263 } 5264 5265 /* pack the outgoing message */ 5266 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5267 5268 sstartsj[0] = 0; 5269 rstartsj[0] = 0; 5270 len = 0; /* total length of j or a array to be sent */ 5271 k = 0; 5272 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5273 for (i=0; i<nsends; i++) { 5274 rowlen = svalues + sstarts[i]*sbs; 5275 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5276 for (j=0; j<nrows; j++) { 5277 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5278 for (l=0; l<sbs; l++) { 5279 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5280 5281 rowlen[j*sbs+l] = ncols; 5282 5283 len += ncols; 5284 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5285 } 5286 k++; 5287 } 5288 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5289 5290 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5291 } 5292 /* recvs and sends of i-array are completed */ 5293 i = nrecvs; 5294 while (i--) { 5295 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5296 } 5297 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5298 ierr = PetscFree(svalues);CHKERRQ(ierr); 5299 5300 /* allocate buffers for sending j and a arrays */ 5301 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5302 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5303 5304 /* create i-array of B_oth */ 5305 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5306 5307 b_othi[0] = 0; 5308 len = 0; /* total length of j or a array to be received */ 5309 k = 0; 5310 for (i=0; i<nrecvs; i++) { 5311 rowlen = rvalues + rstarts[i]*rbs; 5312 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5313 for (j=0; j<nrows; j++) { 5314 b_othi[k+1] = b_othi[k] + rowlen[j]; 5315 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5316 k++; 5317 } 5318 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5319 } 5320 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5321 5322 /* allocate space for j and a arrrays of B_oth */ 5323 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5324 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5325 5326 /* j-array */ 5327 /*---------*/ 5328 /* post receives of j-array */ 5329 for (i=0; i<nrecvs; i++) { 5330 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5331 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5332 } 5333 5334 /* pack the outgoing message j-array */ 5335 k = 0; 5336 for (i=0; i<nsends; i++) { 5337 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5338 bufJ = bufj+sstartsj[i]; 5339 for (j=0; j<nrows; j++) { 5340 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5341 for (ll=0; ll<sbs; ll++) { 5342 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5343 for (l=0; l<ncols; l++) { 5344 *bufJ++ = cols[l]; 5345 } 5346 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5347 } 5348 } 5349 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5350 } 5351 5352 /* recvs and sends of j-array are completed */ 5353 i = nrecvs; 5354 while (i--) { 5355 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5356 } 5357 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5358 } else if (scall == MAT_REUSE_MATRIX) { 5359 sstartsj = *startsj_s; 5360 rstartsj = *startsj_r; 5361 bufa = *bufa_ptr; 5362 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5363 b_otha = b_oth->a; 5364 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5365 5366 /* a-array */ 5367 /*---------*/ 5368 /* post receives of a-array */ 5369 for (i=0; i<nrecvs; i++) { 5370 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5371 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5372 } 5373 5374 /* pack the outgoing message a-array */ 5375 k = 0; 5376 for (i=0; i<nsends; i++) { 5377 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5378 bufA = bufa+sstartsj[i]; 5379 for (j=0; j<nrows; j++) { 5380 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5381 for (ll=0; ll<sbs; ll++) { 5382 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5383 for (l=0; l<ncols; l++) { 5384 *bufA++ = vals[l]; 5385 } 5386 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5387 } 5388 } 5389 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5390 } 5391 /* recvs and sends of a-array are completed */ 5392 i = nrecvs; 5393 while (i--) { 5394 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5395 } 5396 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5397 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5398 5399 if (scall == MAT_INITIAL_MATRIX) { 5400 /* put together the new matrix */ 5401 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5402 5403 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5404 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5405 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5406 b_oth->free_a = PETSC_TRUE; 5407 b_oth->free_ij = PETSC_TRUE; 5408 b_oth->nonew = 0; 5409 5410 ierr = PetscFree(bufj);CHKERRQ(ierr); 5411 if (!startsj_s || !bufa_ptr) { 5412 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5413 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5414 } else { 5415 *startsj_s = sstartsj; 5416 *startsj_r = rstartsj; 5417 *bufa_ptr = bufa; 5418 } 5419 } 5420 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5421 PetscFunctionReturn(0); 5422 } 5423 5424 /*@C 5425 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5426 5427 Not Collective 5428 5429 Input Parameters: 5430 . A - The matrix in mpiaij format 5431 5432 Output Parameter: 5433 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5434 . colmap - A map from global column index to local index into lvec 5435 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5436 5437 Level: developer 5438 5439 @*/ 5440 #if defined(PETSC_USE_CTABLE) 5441 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5442 #else 5443 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5444 #endif 5445 { 5446 Mat_MPIAIJ *a; 5447 5448 PetscFunctionBegin; 5449 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5450 PetscValidPointer(lvec, 2); 5451 PetscValidPointer(colmap, 3); 5452 PetscValidPointer(multScatter, 4); 5453 a = (Mat_MPIAIJ*) A->data; 5454 if (lvec) *lvec = a->lvec; 5455 if (colmap) *colmap = a->colmap; 5456 if (multScatter) *multScatter = a->Mvctx; 5457 PetscFunctionReturn(0); 5458 } 5459 5460 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5461 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5462 #if defined(PETSC_HAVE_MKL_SPARSE) 5463 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5464 #endif 5465 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5466 #if defined(PETSC_HAVE_ELEMENTAL) 5467 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5468 #endif 5469 #if defined(PETSC_HAVE_HYPRE) 5470 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5471 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5472 #endif 5473 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5474 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5475 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5476 5477 /* 5478 Computes (B'*A')' since computing B*A directly is untenable 5479 5480 n p p 5481 ( ) ( ) ( ) 5482 m ( A ) * n ( B ) = m ( C ) 5483 ( ) ( ) ( ) 5484 5485 */ 5486 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5487 { 5488 PetscErrorCode ierr; 5489 Mat At,Bt,Ct; 5490 5491 PetscFunctionBegin; 5492 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5493 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5494 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5495 ierr = MatDestroy(&At);CHKERRQ(ierr); 5496 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5497 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5498 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5499 PetscFunctionReturn(0); 5500 } 5501 5502 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5503 { 5504 PetscErrorCode ierr; 5505 PetscInt m=A->rmap->n,n=B->cmap->n; 5506 Mat Cmat; 5507 5508 PetscFunctionBegin; 5509 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5510 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5511 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5512 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5513 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5514 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5515 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5516 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5517 5518 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5519 5520 *C = Cmat; 5521 PetscFunctionReturn(0); 5522 } 5523 5524 /* ----------------------------------------------------------------*/ 5525 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5526 { 5527 PetscErrorCode ierr; 5528 5529 PetscFunctionBegin; 5530 if (scall == MAT_INITIAL_MATRIX) { 5531 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5532 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5533 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5534 } 5535 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5536 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5537 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5538 PetscFunctionReturn(0); 5539 } 5540 5541 /*MC 5542 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5543 5544 Options Database Keys: 5545 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5546 5547 Level: beginner 5548 5549 .seealso: MatCreateAIJ() 5550 M*/ 5551 5552 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5553 { 5554 Mat_MPIAIJ *b; 5555 PetscErrorCode ierr; 5556 PetscMPIInt size; 5557 5558 PetscFunctionBegin; 5559 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5560 5561 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5562 B->data = (void*)b; 5563 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5564 B->assembled = PETSC_FALSE; 5565 B->insertmode = NOT_SET_VALUES; 5566 b->size = size; 5567 5568 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5569 5570 /* build cache for off array entries formed */ 5571 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5572 5573 b->donotstash = PETSC_FALSE; 5574 b->colmap = 0; 5575 b->garray = 0; 5576 b->roworiented = PETSC_TRUE; 5577 5578 /* stuff used for matrix vector multiply */ 5579 b->lvec = NULL; 5580 b->Mvctx = NULL; 5581 5582 /* stuff for MatGetRow() */ 5583 b->rowindices = 0; 5584 b->rowvalues = 0; 5585 b->getrowactive = PETSC_FALSE; 5586 5587 /* flexible pointer used in CUSP/CUSPARSE classes */ 5588 b->spptr = NULL; 5589 5590 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5591 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5592 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5593 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5594 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5599 #if defined(PETSC_HAVE_MKL_SPARSE) 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5601 #endif 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5604 #if defined(PETSC_HAVE_ELEMENTAL) 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5606 #endif 5607 #if defined(PETSC_HAVE_HYPRE) 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5609 #endif 5610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5615 #if defined(PETSC_HAVE_HYPRE) 5616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5617 #endif 5618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5619 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5620 PetscFunctionReturn(0); 5621 } 5622 5623 /*@C 5624 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5625 and "off-diagonal" part of the matrix in CSR format. 5626 5627 Collective on MPI_Comm 5628 5629 Input Parameters: 5630 + comm - MPI communicator 5631 . m - number of local rows (Cannot be PETSC_DECIDE) 5632 . n - This value should be the same as the local size used in creating the 5633 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5634 calculated if N is given) For square matrices n is almost always m. 5635 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5636 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5637 . i - row indices for "diagonal" portion of matrix 5638 . j - column indices 5639 . a - matrix values 5640 . oi - row indices for "off-diagonal" portion of matrix 5641 . oj - column indices 5642 - oa - matrix values 5643 5644 Output Parameter: 5645 . mat - the matrix 5646 5647 Level: advanced 5648 5649 Notes: 5650 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5651 must free the arrays once the matrix has been destroyed and not before. 5652 5653 The i and j indices are 0 based 5654 5655 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5656 5657 This sets local rows and cannot be used to set off-processor values. 5658 5659 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5660 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5661 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5662 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5663 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5664 communication if it is known that only local entries will be set. 5665 5666 .keywords: matrix, aij, compressed row, sparse, parallel 5667 5668 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5669 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5670 @*/ 5671 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5672 { 5673 PetscErrorCode ierr; 5674 Mat_MPIAIJ *maij; 5675 5676 PetscFunctionBegin; 5677 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5678 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5679 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5680 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5681 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5682 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5683 maij = (Mat_MPIAIJ*) (*mat)->data; 5684 5685 (*mat)->preallocated = PETSC_TRUE; 5686 5687 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5688 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5689 5690 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5691 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5692 5693 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5694 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5695 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5696 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5697 5698 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5699 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5700 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5701 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5702 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5703 PetscFunctionReturn(0); 5704 } 5705 5706 /* 5707 Special version for direct calls from Fortran 5708 */ 5709 #include <petsc/private/fortranimpl.h> 5710 5711 /* Change these macros so can be used in void function */ 5712 #undef CHKERRQ 5713 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5714 #undef SETERRQ2 5715 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5716 #undef SETERRQ3 5717 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5718 #undef SETERRQ 5719 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5720 5721 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5722 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5723 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5724 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5725 #else 5726 #endif 5727 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5728 { 5729 Mat mat = *mmat; 5730 PetscInt m = *mm, n = *mn; 5731 InsertMode addv = *maddv; 5732 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5733 PetscScalar value; 5734 PetscErrorCode ierr; 5735 5736 MatCheckPreallocated(mat,1); 5737 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5738 5739 #if defined(PETSC_USE_DEBUG) 5740 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5741 #endif 5742 { 5743 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5744 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5745 PetscBool roworiented = aij->roworiented; 5746 5747 /* Some Variables required in the macro */ 5748 Mat A = aij->A; 5749 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5750 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5751 MatScalar *aa = a->a; 5752 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5753 Mat B = aij->B; 5754 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5755 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5756 MatScalar *ba = b->a; 5757 5758 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5759 PetscInt nonew = a->nonew; 5760 MatScalar *ap1,*ap2; 5761 5762 PetscFunctionBegin; 5763 for (i=0; i<m; i++) { 5764 if (im[i] < 0) continue; 5765 #if defined(PETSC_USE_DEBUG) 5766 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5767 #endif 5768 if (im[i] >= rstart && im[i] < rend) { 5769 row = im[i] - rstart; 5770 lastcol1 = -1; 5771 rp1 = aj + ai[row]; 5772 ap1 = aa + ai[row]; 5773 rmax1 = aimax[row]; 5774 nrow1 = ailen[row]; 5775 low1 = 0; 5776 high1 = nrow1; 5777 lastcol2 = -1; 5778 rp2 = bj + bi[row]; 5779 ap2 = ba + bi[row]; 5780 rmax2 = bimax[row]; 5781 nrow2 = bilen[row]; 5782 low2 = 0; 5783 high2 = nrow2; 5784 5785 for (j=0; j<n; j++) { 5786 if (roworiented) value = v[i*n+j]; 5787 else value = v[i+j*m]; 5788 if (in[j] >= cstart && in[j] < cend) { 5789 col = in[j] - cstart; 5790 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5791 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5792 } else if (in[j] < 0) continue; 5793 #if defined(PETSC_USE_DEBUG) 5794 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5795 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5796 #endif 5797 else { 5798 if (mat->was_assembled) { 5799 if (!aij->colmap) { 5800 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5801 } 5802 #if defined(PETSC_USE_CTABLE) 5803 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5804 col--; 5805 #else 5806 col = aij->colmap[in[j]] - 1; 5807 #endif 5808 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5809 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5810 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5811 col = in[j]; 5812 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5813 B = aij->B; 5814 b = (Mat_SeqAIJ*)B->data; 5815 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5816 rp2 = bj + bi[row]; 5817 ap2 = ba + bi[row]; 5818 rmax2 = bimax[row]; 5819 nrow2 = bilen[row]; 5820 low2 = 0; 5821 high2 = nrow2; 5822 bm = aij->B->rmap->n; 5823 ba = b->a; 5824 } 5825 } else col = in[j]; 5826 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5827 } 5828 } 5829 } else if (!aij->donotstash) { 5830 if (roworiented) { 5831 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5832 } else { 5833 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5834 } 5835 } 5836 } 5837 } 5838 PetscFunctionReturnVoid(); 5839 } 5840 5841