1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/vecscatterimpl.h> 6 #include <petsc/private/isimpl.h> 7 #include <petscblaslapack.h> 8 #include <petscsf.h> 9 10 /*MC 11 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 12 13 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 14 and MATMPIAIJ otherwise. As a result, for single process communicators, 15 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 16 for communicators controlling multiple processes. It is recommended that you call both of 17 the above preallocation routines for simplicity. 18 19 Options Database Keys: 20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 21 22 Developer Notes: 23 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 24 enough exist. 25 26 Level: beginner 27 28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 29 M*/ 30 31 /*MC 32 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 35 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 42 43 Level: beginner 44 45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 46 M*/ 47 48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 49 { 50 PetscErrorCode ierr; 51 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 52 53 PetscFunctionBegin; 54 if (mat->A) { 55 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 56 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 57 } 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 62 { 63 PetscErrorCode ierr; 64 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 65 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 66 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 67 const PetscInt *ia,*ib; 68 const MatScalar *aa,*bb; 69 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 70 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 71 72 PetscFunctionBegin; 73 *keptrows = 0; 74 ia = a->i; 75 ib = b->i; 76 for (i=0; i<m; i++) { 77 na = ia[i+1] - ia[i]; 78 nb = ib[i+1] - ib[i]; 79 if (!na && !nb) { 80 cnt++; 81 goto ok1; 82 } 83 aa = a->a + ia[i]; 84 for (j=0; j<na; j++) { 85 if (aa[j] != 0.0) goto ok1; 86 } 87 bb = b->a + ib[i]; 88 for (j=0; j <nb; j++) { 89 if (bb[j] != 0.0) goto ok1; 90 } 91 cnt++; 92 ok1:; 93 } 94 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 95 if (!n0rows) PetscFunctionReturn(0); 96 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 97 cnt = 0; 98 for (i=0; i<m; i++) { 99 na = ia[i+1] - ia[i]; 100 nb = ib[i+1] - ib[i]; 101 if (!na && !nb) continue; 102 aa = a->a + ia[i]; 103 for (j=0; j<na;j++) { 104 if (aa[j] != 0.0) { 105 rows[cnt++] = rstart + i; 106 goto ok2; 107 } 108 } 109 bb = b->a + ib[i]; 110 for (j=0; j<nb; j++) { 111 if (bb[j] != 0.0) { 112 rows[cnt++] = rstart + i; 113 goto ok2; 114 } 115 } 116 ok2:; 117 } 118 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 123 { 124 PetscErrorCode ierr; 125 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 126 PetscBool cong; 127 128 PetscFunctionBegin; 129 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 130 if (Y->assembled && cong) { 131 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 132 } else { 133 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 134 } 135 PetscFunctionReturn(0); 136 } 137 138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 139 { 140 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 141 PetscErrorCode ierr; 142 PetscInt i,rstart,nrows,*rows; 143 144 PetscFunctionBegin; 145 *zrows = NULL; 146 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 147 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 148 for (i=0; i<nrows; i++) rows[i] += rstart; 149 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 154 { 155 PetscErrorCode ierr; 156 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 157 PetscInt i,n,*garray = aij->garray; 158 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 159 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 160 PetscReal *work; 161 162 PetscFunctionBegin; 163 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 164 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 165 if (type == NORM_2) { 166 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 167 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 168 } 169 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 170 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 171 } 172 } else if (type == NORM_1) { 173 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 174 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 175 } 176 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 177 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 178 } 179 } else if (type == NORM_INFINITY) { 180 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 181 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 182 } 183 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 184 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 185 } 186 187 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 188 if (type == NORM_INFINITY) { 189 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 190 } else { 191 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 192 } 193 ierr = PetscFree(work);CHKERRQ(ierr); 194 if (type == NORM_2) { 195 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 201 { 202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 203 IS sis,gis; 204 PetscErrorCode ierr; 205 const PetscInt *isis,*igis; 206 PetscInt n,*iis,nsis,ngis,rstart,i; 207 208 PetscFunctionBegin; 209 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 210 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 211 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 212 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 213 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 214 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 215 216 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 218 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 219 n = ngis + nsis; 220 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 221 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 222 for (i=0; i<n; i++) iis[i] += rstart; 223 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 224 225 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 226 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 227 ierr = ISDestroy(&sis);CHKERRQ(ierr); 228 ierr = ISDestroy(&gis);CHKERRQ(ierr); 229 PetscFunctionReturn(0); 230 } 231 232 /* 233 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 234 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 235 236 Only for square matrices 237 238 Used by a preconditioner, hence PETSC_EXTERN 239 */ 240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 241 { 242 PetscMPIInt rank,size; 243 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 244 PetscErrorCode ierr; 245 Mat mat; 246 Mat_SeqAIJ *gmata; 247 PetscMPIInt tag; 248 MPI_Status status; 249 PetscBool aij; 250 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 251 252 PetscFunctionBegin; 253 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 254 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 255 if (!rank) { 256 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 257 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 258 } 259 if (reuse == MAT_INITIAL_MATRIX) { 260 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 261 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 262 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 263 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 264 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 265 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 266 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 267 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 268 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 269 270 rowners[0] = 0; 271 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 272 rstart = rowners[rank]; 273 rend = rowners[rank+1]; 274 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 275 if (!rank) { 276 gmata = (Mat_SeqAIJ*) gmat->data; 277 /* send row lengths to all processors */ 278 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 279 for (i=1; i<size; i++) { 280 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 281 } 282 /* determine number diagonal and off-diagonal counts */ 283 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 284 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 285 jj = 0; 286 for (i=0; i<m; i++) { 287 for (j=0; j<dlens[i]; j++) { 288 if (gmata->j[jj] < rstart) ld[i]++; 289 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 290 jj++; 291 } 292 } 293 /* send column indices to other processes */ 294 for (i=1; i<size; i++) { 295 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 296 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 298 } 299 300 /* send numerical values to other processes */ 301 for (i=1; i<size; i++) { 302 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 303 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 304 } 305 gmataa = gmata->a; 306 gmataj = gmata->j; 307 308 } else { 309 /* receive row lengths */ 310 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* receive column indices */ 312 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 313 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 314 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 315 /* determine number diagonal and off-diagonal counts */ 316 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 317 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 318 jj = 0; 319 for (i=0; i<m; i++) { 320 for (j=0; j<dlens[i]; j++) { 321 if (gmataj[jj] < rstart) ld[i]++; 322 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 323 jj++; 324 } 325 } 326 /* receive numerical values */ 327 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 328 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 329 } 330 /* set preallocation */ 331 for (i=0; i<m; i++) { 332 dlens[i] -= olens[i]; 333 } 334 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 335 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 336 337 for (i=0; i<m; i++) { 338 dlens[i] += olens[i]; 339 } 340 cnt = 0; 341 for (i=0; i<m; i++) { 342 row = rstart + i; 343 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 344 cnt += dlens[i]; 345 } 346 if (rank) { 347 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 348 } 349 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 350 ierr = PetscFree(rowners);CHKERRQ(ierr); 351 352 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 353 354 *inmat = mat; 355 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 356 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 357 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 358 mat = *inmat; 359 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 360 if (!rank) { 361 /* send numerical values to other processes */ 362 gmata = (Mat_SeqAIJ*) gmat->data; 363 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 364 gmataa = gmata->a; 365 for (i=1; i<size; i++) { 366 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 367 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 368 } 369 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 370 } else { 371 /* receive numerical values from process 0*/ 372 nz = Ad->nz + Ao->nz; 373 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 374 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 375 } 376 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 377 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 378 ad = Ad->a; 379 ao = Ao->a; 380 if (mat->rmap->n) { 381 i = 0; 382 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 for (i=1; i<mat->rmap->n; i++) { 386 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 387 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 388 } 389 i--; 390 if (mat->rmap->n) { 391 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 392 } 393 if (rank) { 394 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 395 } 396 } 397 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 399 PetscFunctionReturn(0); 400 } 401 402 /* 403 Local utility routine that creates a mapping from the global column 404 number to the local number in the off-diagonal part of the local 405 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 406 a slightly higher hash table cost; without it it is not scalable (each processor 407 has an order N integer array but is fast to acess. 408 */ 409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscInt n = aij->B->cmap->n,i; 414 415 PetscFunctionBegin; 416 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 417 #if defined(PETSC_USE_CTABLE) 418 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 419 for (i=0; i<n; i++) { 420 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 421 } 422 #else 423 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 424 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 425 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 426 #endif 427 PetscFunctionReturn(0); 428 } 429 430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 431 { \ 432 if (col <= lastcol1) low1 = 0; \ 433 else high1 = nrow1; \ 434 lastcol1 = col;\ 435 while (high1-low1 > 5) { \ 436 t = (low1+high1)/2; \ 437 if (rp1[t] > col) high1 = t; \ 438 else low1 = t; \ 439 } \ 440 for (_i=low1; _i<high1; _i++) { \ 441 if (rp1[_i] > col) break; \ 442 if (rp1[_i] == col) { \ 443 if (addv == ADD_VALUES) { \ 444 ap1[_i] += value; \ 445 /* Not sure LogFlops will slow dow the code or not */ \ 446 (void)PetscLogFlops(1.0); \ 447 } \ 448 else ap1[_i] = value; \ 449 goto a_noinsert; \ 450 } \ 451 } \ 452 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 453 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 454 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 455 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 456 N = nrow1++ - 1; a->nz++; high1++; \ 457 /* shift up all the later entries in this row */ \ 458 for (ii=N; ii>=_i; ii--) { \ 459 rp1[ii+1] = rp1[ii]; \ 460 ap1[ii+1] = ap1[ii]; \ 461 } \ 462 rp1[_i] = col; \ 463 ap1[_i] = value; \ 464 A->nonzerostate++;\ 465 a_noinsert: ; \ 466 ailen[row] = nrow1; \ 467 } 468 469 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 470 { \ 471 if (col <= lastcol2) low2 = 0; \ 472 else high2 = nrow2; \ 473 lastcol2 = col; \ 474 while (high2-low2 > 5) { \ 475 t = (low2+high2)/2; \ 476 if (rp2[t] > col) high2 = t; \ 477 else low2 = t; \ 478 } \ 479 for (_i=low2; _i<high2; _i++) { \ 480 if (rp2[_i] > col) break; \ 481 if (rp2[_i] == col) { \ 482 if (addv == ADD_VALUES) { \ 483 ap2[_i] += value; \ 484 (void)PetscLogFlops(1.0); \ 485 } \ 486 else ap2[_i] = value; \ 487 goto b_noinsert; \ 488 } \ 489 } \ 490 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 491 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 492 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 493 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 494 N = nrow2++ - 1; b->nz++; high2++; \ 495 /* shift up all the later entries in this row */ \ 496 for (ii=N; ii>=_i; ii--) { \ 497 rp2[ii+1] = rp2[ii]; \ 498 ap2[ii+1] = ap2[ii]; \ 499 } \ 500 rp2[_i] = col; \ 501 ap2[_i] = value; \ 502 B->nonzerostate++; \ 503 b_noinsert: ; \ 504 bilen[row] = nrow2; \ 505 } 506 507 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 508 { 509 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 510 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 511 PetscErrorCode ierr; 512 PetscInt l,*garray = mat->garray,diag; 513 514 PetscFunctionBegin; 515 /* code only works for square matrices A */ 516 517 /* find size of row to the left of the diagonal part */ 518 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 519 row = row - diag; 520 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 521 if (garray[b->j[b->i[row]+l]] > diag) break; 522 } 523 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 524 525 /* diagonal part */ 526 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 527 528 /* right of diagonal part */ 529 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 530 PetscFunctionReturn(0); 531 } 532 533 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 534 { 535 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 536 PetscScalar value; 537 PetscErrorCode ierr; 538 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 539 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 540 PetscBool roworiented = aij->roworiented; 541 542 /* Some Variables required in the macro */ 543 Mat A = aij->A; 544 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 545 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 546 MatScalar *aa = a->a; 547 PetscBool ignorezeroentries = a->ignorezeroentries; 548 Mat B = aij->B; 549 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 550 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 551 MatScalar *ba = b->a; 552 553 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 554 PetscInt nonew; 555 MatScalar *ap1,*ap2; 556 557 PetscFunctionBegin; 558 for (i=0; i<m; i++) { 559 if (im[i] < 0) continue; 560 #if defined(PETSC_USE_DEBUG) 561 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 562 #endif 563 if (im[i] >= rstart && im[i] < rend) { 564 row = im[i] - rstart; 565 lastcol1 = -1; 566 rp1 = aj + ai[row]; 567 ap1 = aa + ai[row]; 568 rmax1 = aimax[row]; 569 nrow1 = ailen[row]; 570 low1 = 0; 571 high1 = nrow1; 572 lastcol2 = -1; 573 rp2 = bj + bi[row]; 574 ap2 = ba + bi[row]; 575 rmax2 = bimax[row]; 576 nrow2 = bilen[row]; 577 low2 = 0; 578 high2 = nrow2; 579 580 for (j=0; j<n; j++) { 581 if (roworiented) value = v[i*n+j]; 582 else value = v[i+j*m]; 583 if (in[j] >= cstart && in[j] < cend) { 584 col = in[j] - cstart; 585 nonew = a->nonew; 586 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 587 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 588 } else if (in[j] < 0) continue; 589 #if defined(PETSC_USE_DEBUG) 590 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 591 #endif 592 else { 593 if (mat->was_assembled) { 594 if (!aij->colmap) { 595 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 596 } 597 #if defined(PETSC_USE_CTABLE) 598 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 599 col--; 600 #else 601 col = aij->colmap[in[j]] - 1; 602 #endif 603 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 604 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 605 col = in[j]; 606 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 607 B = aij->B; 608 b = (Mat_SeqAIJ*)B->data; 609 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 610 rp2 = bj + bi[row]; 611 ap2 = ba + bi[row]; 612 rmax2 = bimax[row]; 613 nrow2 = bilen[row]; 614 low2 = 0; 615 high2 = nrow2; 616 bm = aij->B->rmap->n; 617 ba = b->a; 618 } else if (col < 0) { 619 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 620 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 621 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 622 } 623 } else col = in[j]; 624 nonew = b->nonew; 625 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 626 } 627 } 628 } else { 629 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 630 if (!aij->donotstash) { 631 mat->assembled = PETSC_FALSE; 632 if (roworiented) { 633 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 634 } else { 635 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 636 } 637 } 638 } 639 } 640 PetscFunctionReturn(0); 641 } 642 643 /* 644 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 645 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 646 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 647 */ 648 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 649 { 650 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 651 Mat A = aij->A; /* diagonal part of the matrix */ 652 Mat B = aij->B; /* offdiagonal part of the matrix */ 653 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 654 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 655 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 656 PetscInt *ailen = a->ilen,*aj = a->j; 657 PetscInt *bilen = b->ilen,*bj = b->j; 658 PetscInt am = aij->A->rmap->n,j; 659 PetscInt diag_so_far = 0,dnz; 660 PetscInt offd_so_far = 0,onz; 661 662 PetscFunctionBegin; 663 /* Iterate over all rows of the matrix */ 664 for (j=0; j<am; j++) { 665 dnz = onz = 0; 666 /* Iterate over all non-zero columns of the current row */ 667 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 668 /* If column is in the diagonal */ 669 if (mat_j[col] >= cstart && mat_j[col] < cend) { 670 aj[diag_so_far++] = mat_j[col] - cstart; 671 dnz++; 672 } else { /* off-diagonal entries */ 673 bj[offd_so_far++] = mat_j[col]; 674 onz++; 675 } 676 } 677 ailen[j] = dnz; 678 bilen[j] = onz; 679 } 680 PetscFunctionReturn(0); 681 } 682 683 /* 684 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 685 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 686 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 687 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 688 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 689 */ 690 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 691 { 692 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 693 Mat A = aij->A; /* diagonal part of the matrix */ 694 Mat B = aij->B; /* offdiagonal part of the matrix */ 695 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 696 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 697 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 698 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 699 PetscInt *ailen = a->ilen,*aj = a->j; 700 PetscInt *bilen = b->ilen,*bj = b->j; 701 PetscInt am = aij->A->rmap->n,j; 702 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 703 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 704 PetscScalar *aa = a->a,*ba = b->a; 705 706 PetscFunctionBegin; 707 /* Iterate over all rows of the matrix */ 708 for (j=0; j<am; j++) { 709 dnz_row = onz_row = 0; 710 rowstart_offd = full_offd_i[j]; 711 rowstart_diag = full_diag_i[j]; 712 /* Iterate over all non-zero columns of the current row */ 713 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 714 /* If column is in the diagonal */ 715 if (mat_j[col] >= cstart && mat_j[col] < cend) { 716 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 717 aa[rowstart_diag+dnz_row] = mat_a[col]; 718 dnz_row++; 719 } else { /* off-diagonal entries */ 720 bj[rowstart_offd+onz_row] = mat_j[col]; 721 ba[rowstart_offd+onz_row] = mat_a[col]; 722 onz_row++; 723 } 724 } 725 ailen[j] = dnz_row; 726 bilen[j] = onz_row; 727 } 728 PetscFunctionReturn(0); 729 } 730 731 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 732 { 733 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 734 PetscErrorCode ierr; 735 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 736 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 737 738 PetscFunctionBegin; 739 for (i=0; i<m; i++) { 740 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 741 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 742 if (idxm[i] >= rstart && idxm[i] < rend) { 743 row = idxm[i] - rstart; 744 for (j=0; j<n; j++) { 745 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 746 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 747 if (idxn[j] >= cstart && idxn[j] < cend) { 748 col = idxn[j] - cstart; 749 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 750 } else { 751 if (!aij->colmap) { 752 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 753 } 754 #if defined(PETSC_USE_CTABLE) 755 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 756 col--; 757 #else 758 col = aij->colmap[idxn[j]] - 1; 759 #endif 760 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 761 else { 762 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 763 } 764 } 765 } 766 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 767 } 768 PetscFunctionReturn(0); 769 } 770 771 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 772 773 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 774 { 775 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 776 PetscErrorCode ierr; 777 PetscInt nstash,reallocs; 778 779 PetscFunctionBegin; 780 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 781 782 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 783 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 784 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 785 PetscFunctionReturn(0); 786 } 787 788 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 789 { 790 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 791 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 792 PetscErrorCode ierr; 793 PetscMPIInt n; 794 PetscInt i,j,rstart,ncols,flg; 795 PetscInt *row,*col; 796 PetscBool other_disassembled; 797 PetscScalar *val; 798 799 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 800 801 PetscFunctionBegin; 802 if (!aij->donotstash && !mat->nooffprocentries) { 803 while (1) { 804 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 805 if (!flg) break; 806 807 for (i=0; i<n; ) { 808 /* Now identify the consecutive vals belonging to the same row */ 809 for (j=i,rstart=row[j]; j<n; j++) { 810 if (row[j] != rstart) break; 811 } 812 if (j < n) ncols = j-i; 813 else ncols = n-i; 814 /* Now assemble all these values with a single function call */ 815 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 816 817 i = j; 818 } 819 } 820 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 821 } 822 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 823 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 824 825 /* determine if any processor has disassembled, if so we must 826 also disassemble ourselfs, in order that we may reassemble. */ 827 /* 828 if nonzero structure of submatrix B cannot change then we know that 829 no processor disassembled thus we can skip this stuff 830 */ 831 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 832 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 833 if (mat->was_assembled && !other_disassembled) { 834 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 835 } 836 } 837 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 838 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 839 } 840 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 841 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 842 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 843 844 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 845 846 aij->rowvalues = 0; 847 848 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 849 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 850 851 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 852 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 853 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 854 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 855 } 856 PetscFunctionReturn(0); 857 } 858 859 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 860 { 861 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 862 PetscErrorCode ierr; 863 864 PetscFunctionBegin; 865 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 866 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 871 { 872 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 873 PetscObjectState sA, sB; 874 PetscInt *lrows; 875 PetscInt r, len; 876 PetscBool cong, lch, gch; 877 PetscErrorCode ierr; 878 879 PetscFunctionBegin; 880 /* get locally owned rows */ 881 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 882 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 883 /* fix right hand side if needed */ 884 if (x && b) { 885 const PetscScalar *xx; 886 PetscScalar *bb; 887 888 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 889 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 890 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 891 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 892 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 893 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 894 } 895 896 sA = mat->A->nonzerostate; 897 sB = mat->B->nonzerostate; 898 899 if (diag != 0.0 && cong) { 900 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 901 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 902 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 903 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 904 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 905 PetscInt nnwA, nnwB; 906 PetscBool nnzA, nnzB; 907 908 nnwA = aijA->nonew; 909 nnwB = aijB->nonew; 910 nnzA = aijA->keepnonzeropattern; 911 nnzB = aijB->keepnonzeropattern; 912 if (!nnzA) { 913 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 914 aijA->nonew = 0; 915 } 916 if (!nnzB) { 917 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 918 aijB->nonew = 0; 919 } 920 /* Must zero here before the next loop */ 921 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 922 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 923 for (r = 0; r < len; ++r) { 924 const PetscInt row = lrows[r] + A->rmap->rstart; 925 if (row >= A->cmap->N) continue; 926 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 927 } 928 aijA->nonew = nnwA; 929 aijB->nonew = nnwB; 930 } else { 931 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 932 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 933 } 934 ierr = PetscFree(lrows);CHKERRQ(ierr); 935 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 936 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 937 938 /* reduce nonzerostate */ 939 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 940 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 941 if (gch) A->nonzerostate++; 942 PetscFunctionReturn(0); 943 } 944 945 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 946 { 947 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 948 PetscErrorCode ierr; 949 PetscMPIInt n = A->rmap->n; 950 PetscInt i,j,r,m,p = 0,len = 0; 951 PetscInt *lrows,*owners = A->rmap->range; 952 PetscSFNode *rrows; 953 PetscSF sf; 954 const PetscScalar *xx; 955 PetscScalar *bb,*mask; 956 Vec xmask,lmask; 957 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 958 const PetscInt *aj, *ii,*ridx; 959 PetscScalar *aa; 960 961 PetscFunctionBegin; 962 /* Create SF where leaves are input rows and roots are owned rows */ 963 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 964 for (r = 0; r < n; ++r) lrows[r] = -1; 965 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 966 for (r = 0; r < N; ++r) { 967 const PetscInt idx = rows[r]; 968 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 969 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 970 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 971 } 972 rrows[r].rank = p; 973 rrows[r].index = rows[r] - owners[p]; 974 } 975 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 976 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 977 /* Collect flags for rows to be zeroed */ 978 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 979 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 980 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 981 /* Compress and put in row numbers */ 982 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 983 /* zero diagonal part of matrix */ 984 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 985 /* handle off diagonal part of matrix */ 986 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 987 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 988 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 989 for (i=0; i<len; i++) bb[lrows[i]] = 1; 990 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 991 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 993 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 994 if (x && b) { /* this code is buggy when the row and column layout don't match */ 995 PetscBool cong; 996 997 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 998 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 999 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1000 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1002 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1003 } 1004 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1005 /* remove zeroed rows of off diagonal matrix */ 1006 ii = aij->i; 1007 for (i=0; i<len; i++) { 1008 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1009 } 1010 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1011 if (aij->compressedrow.use) { 1012 m = aij->compressedrow.nrows; 1013 ii = aij->compressedrow.i; 1014 ridx = aij->compressedrow.rindex; 1015 for (i=0; i<m; i++) { 1016 n = ii[i+1] - ii[i]; 1017 aj = aij->j + ii[i]; 1018 aa = aij->a + ii[i]; 1019 1020 for (j=0; j<n; j++) { 1021 if (PetscAbsScalar(mask[*aj])) { 1022 if (b) bb[*ridx] -= *aa*xx[*aj]; 1023 *aa = 0.0; 1024 } 1025 aa++; 1026 aj++; 1027 } 1028 ridx++; 1029 } 1030 } else { /* do not use compressed row format */ 1031 m = l->B->rmap->n; 1032 for (i=0; i<m; i++) { 1033 n = ii[i+1] - ii[i]; 1034 aj = aij->j + ii[i]; 1035 aa = aij->a + ii[i]; 1036 for (j=0; j<n; j++) { 1037 if (PetscAbsScalar(mask[*aj])) { 1038 if (b) bb[i] -= *aa*xx[*aj]; 1039 *aa = 0.0; 1040 } 1041 aa++; 1042 aj++; 1043 } 1044 } 1045 } 1046 if (x && b) { 1047 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1048 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1049 } 1050 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1051 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1052 ierr = PetscFree(lrows);CHKERRQ(ierr); 1053 1054 /* only change matrix nonzero state if pattern was allowed to be changed */ 1055 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1056 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1057 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1058 } 1059 PetscFunctionReturn(0); 1060 } 1061 1062 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1063 { 1064 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1065 PetscErrorCode ierr; 1066 PetscInt nt; 1067 VecScatter Mvctx = a->Mvctx; 1068 1069 PetscFunctionBegin; 1070 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1071 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1072 1073 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1074 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1075 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1076 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1077 PetscFunctionReturn(0); 1078 } 1079 1080 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 1085 PetscFunctionBegin; 1086 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1091 { 1092 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 VecScatter Mvctx = a->Mvctx; 1095 1096 PetscFunctionBegin; 1097 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1098 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1099 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1100 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1101 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 /* do nondiagonal part */ 1112 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1113 /* do local part */ 1114 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1115 /* add partial results together */ 1116 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1117 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1122 { 1123 MPI_Comm comm; 1124 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1125 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1126 IS Me,Notme; 1127 PetscErrorCode ierr; 1128 PetscInt M,N,first,last,*notme,i; 1129 PetscBool lf; 1130 PetscMPIInt size; 1131 1132 PetscFunctionBegin; 1133 /* Easy test: symmetric diagonal block */ 1134 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1135 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1136 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1137 if (!*f) PetscFunctionReturn(0); 1138 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1139 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1140 if (size == 1) PetscFunctionReturn(0); 1141 1142 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1143 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1144 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1145 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1146 for (i=0; i<first; i++) notme[i] = i; 1147 for (i=last; i<M; i++) notme[i-last+first] = i; 1148 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1149 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1150 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1151 Aoff = Aoffs[0]; 1152 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1153 Boff = Boffs[0]; 1154 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1155 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1156 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1157 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1158 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1159 ierr = PetscFree(notme);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1164 { 1165 PetscErrorCode ierr; 1166 1167 PetscFunctionBegin; 1168 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1169 PetscFunctionReturn(0); 1170 } 1171 1172 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1173 { 1174 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 /* do nondiagonal part */ 1179 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1180 /* do local part */ 1181 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1182 /* add partial results together */ 1183 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1184 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 /* 1189 This only works correctly for square matrices where the subblock A->A is the 1190 diagonal block 1191 */ 1192 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1193 { 1194 PetscErrorCode ierr; 1195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1196 1197 PetscFunctionBegin; 1198 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1199 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1200 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1205 { 1206 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1207 PetscErrorCode ierr; 1208 1209 PetscFunctionBegin; 1210 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1211 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1212 PetscFunctionReturn(0); 1213 } 1214 1215 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1216 { 1217 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1218 PetscErrorCode ierr; 1219 1220 PetscFunctionBegin; 1221 #if defined(PETSC_USE_LOG) 1222 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1223 #endif 1224 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1225 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1226 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1227 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1228 #if defined(PETSC_USE_CTABLE) 1229 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1230 #else 1231 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1232 #endif 1233 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1234 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1235 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1236 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1237 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1238 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1239 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1240 1241 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1250 #if defined(PETSC_HAVE_ELEMENTAL) 1251 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1252 #endif 1253 #if defined(PETSC_HAVE_HYPRE) 1254 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1255 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1256 #endif 1257 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1258 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1263 { 1264 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1265 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1266 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1267 PetscErrorCode ierr; 1268 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1269 int fd; 1270 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1271 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1272 PetscScalar *column_values; 1273 PetscInt message_count,flowcontrolcount; 1274 FILE *file; 1275 1276 PetscFunctionBegin; 1277 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1278 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1279 nz = A->nz + B->nz; 1280 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1281 if (!rank) { 1282 header[0] = MAT_FILE_CLASSID; 1283 header[1] = mat->rmap->N; 1284 header[2] = mat->cmap->N; 1285 1286 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1287 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1288 /* get largest number of rows any processor has */ 1289 rlen = mat->rmap->n; 1290 range = mat->rmap->range; 1291 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1292 } else { 1293 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1294 rlen = mat->rmap->n; 1295 } 1296 1297 /* load up the local row counts */ 1298 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1299 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1300 1301 /* store the row lengths to the file */ 1302 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1303 if (!rank) { 1304 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1305 for (i=1; i<size; i++) { 1306 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1307 rlen = range[i+1] - range[i]; 1308 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1309 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1310 } 1311 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1312 } else { 1313 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1314 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1315 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1316 } 1317 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1318 1319 /* load up the local column indices */ 1320 nzmax = nz; /* th processor needs space a largest processor needs */ 1321 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1322 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1323 cnt = 0; 1324 for (i=0; i<mat->rmap->n; i++) { 1325 for (j=B->i[i]; j<B->i[i+1]; j++) { 1326 if ((col = garray[B->j[j]]) > cstart) break; 1327 column_indices[cnt++] = col; 1328 } 1329 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1330 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1331 } 1332 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1333 1334 /* store the column indices to the file */ 1335 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1336 if (!rank) { 1337 MPI_Status status; 1338 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1339 for (i=1; i<size; i++) { 1340 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1341 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1342 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1343 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1344 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1345 } 1346 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1347 } else { 1348 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1349 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1350 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1351 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1352 } 1353 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1354 1355 /* load up the local column values */ 1356 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1357 cnt = 0; 1358 for (i=0; i<mat->rmap->n; i++) { 1359 for (j=B->i[i]; j<B->i[i+1]; j++) { 1360 if (garray[B->j[j]] > cstart) break; 1361 column_values[cnt++] = B->a[j]; 1362 } 1363 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1364 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1365 } 1366 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1367 1368 /* store the column values to the file */ 1369 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1370 if (!rank) { 1371 MPI_Status status; 1372 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1373 for (i=1; i<size; i++) { 1374 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1375 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1376 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1377 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1378 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1379 } 1380 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1381 } else { 1382 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1383 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1384 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1385 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(column_values);CHKERRQ(ierr); 1388 1389 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1390 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1391 PetscFunctionReturn(0); 1392 } 1393 1394 #include <petscdraw.h> 1395 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1396 { 1397 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1398 PetscErrorCode ierr; 1399 PetscMPIInt rank = aij->rank,size = aij->size; 1400 PetscBool isdraw,iascii,isbinary; 1401 PetscViewer sviewer; 1402 PetscViewerFormat format; 1403 1404 PetscFunctionBegin; 1405 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1406 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1407 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1408 if (iascii) { 1409 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1410 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1411 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1412 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1413 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1414 for (i=0; i<(PetscInt)size; i++) { 1415 nmax = PetscMax(nmax,nz[i]); 1416 nmin = PetscMin(nmin,nz[i]); 1417 navg += nz[i]; 1418 } 1419 ierr = PetscFree(nz);CHKERRQ(ierr); 1420 navg = navg/size; 1421 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1425 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1426 MatInfo info; 1427 PetscBool inodes; 1428 1429 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1430 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1431 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1432 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1433 if (!inodes) { 1434 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1435 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1436 } else { 1437 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1438 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1439 } 1440 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1441 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1442 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1443 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1444 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1445 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1446 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1447 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1450 PetscInt inodecount,inodelimit,*inodes; 1451 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1452 if (inodes) { 1453 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1454 } else { 1455 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1456 } 1457 PetscFunctionReturn(0); 1458 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1459 PetscFunctionReturn(0); 1460 } 1461 } else if (isbinary) { 1462 if (size == 1) { 1463 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1464 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1465 } else { 1466 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1467 } 1468 PetscFunctionReturn(0); 1469 } else if (iascii && size == 1) { 1470 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1471 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } else if (isdraw) { 1474 PetscDraw draw; 1475 PetscBool isnull; 1476 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1477 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1478 if (isnull) PetscFunctionReturn(0); 1479 } 1480 1481 { /* assemble the entire matrix onto first processor */ 1482 Mat A = NULL, Av; 1483 IS isrow,iscol; 1484 1485 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1486 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1487 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1488 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1489 /* The commented code uses MatCreateSubMatrices instead */ 1490 /* 1491 Mat *AA, A = NULL, Av; 1492 IS isrow,iscol; 1493 1494 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1495 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1496 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1497 if (!rank) { 1498 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1499 A = AA[0]; 1500 Av = AA[0]; 1501 } 1502 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1503 */ 1504 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1505 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1506 /* 1507 Everyone has to call to draw the matrix since the graphics waits are 1508 synchronized across all processors that share the PetscDraw object 1509 */ 1510 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1511 if (!rank) { 1512 if (((PetscObject)mat)->name) { 1513 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1514 } 1515 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1516 } 1517 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1518 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1519 ierr = MatDestroy(&A);CHKERRQ(ierr); 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1525 { 1526 PetscErrorCode ierr; 1527 PetscBool iascii,isdraw,issocket,isbinary; 1528 1529 PetscFunctionBegin; 1530 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1531 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1532 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1533 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1534 if (iascii || isdraw || isbinary || issocket) { 1535 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1536 } 1537 PetscFunctionReturn(0); 1538 } 1539 1540 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1541 { 1542 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1543 PetscErrorCode ierr; 1544 Vec bb1 = 0; 1545 PetscBool hasop; 1546 1547 PetscFunctionBegin; 1548 if (flag == SOR_APPLY_UPPER) { 1549 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1550 PetscFunctionReturn(0); 1551 } 1552 1553 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1554 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1555 } 1556 1557 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1558 if (flag & SOR_ZERO_INITIAL_GUESS) { 1559 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1560 its--; 1561 } 1562 1563 while (its--) { 1564 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1565 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1566 1567 /* update rhs: bb1 = bb - B*x */ 1568 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1569 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1570 1571 /* local sweep */ 1572 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1573 } 1574 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1575 if (flag & SOR_ZERO_INITIAL_GUESS) { 1576 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1577 its--; 1578 } 1579 while (its--) { 1580 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1581 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1582 1583 /* update rhs: bb1 = bb - B*x */ 1584 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1585 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1586 1587 /* local sweep */ 1588 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1589 } 1590 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1591 if (flag & SOR_ZERO_INITIAL_GUESS) { 1592 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1593 its--; 1594 } 1595 while (its--) { 1596 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1597 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1598 1599 /* update rhs: bb1 = bb - B*x */ 1600 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1601 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1602 1603 /* local sweep */ 1604 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1605 } 1606 } else if (flag & SOR_EISENSTAT) { 1607 Vec xx1; 1608 1609 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1610 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1611 1612 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1614 if (!mat->diag) { 1615 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1616 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1617 } 1618 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1619 if (hasop) { 1620 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1621 } else { 1622 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1623 } 1624 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1625 1626 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1627 1628 /* local sweep */ 1629 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1630 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1631 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1632 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1633 1634 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1635 1636 matin->factorerrortype = mat->A->factorerrortype; 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1641 { 1642 Mat aA,aB,Aperm; 1643 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1644 PetscScalar *aa,*ba; 1645 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1646 PetscSF rowsf,sf; 1647 IS parcolp = NULL; 1648 PetscBool done; 1649 PetscErrorCode ierr; 1650 1651 PetscFunctionBegin; 1652 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1653 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1654 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1655 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1656 1657 /* Invert row permutation to find out where my rows should go */ 1658 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1659 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1660 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1661 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1662 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1663 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1664 1665 /* Invert column permutation to find out where my columns should go */ 1666 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1667 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1668 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1669 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1670 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1671 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1672 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1673 1674 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1675 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1676 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1677 1678 /* Find out where my gcols should go */ 1679 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1680 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1681 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1682 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1683 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1684 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1685 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1686 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1687 1688 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1689 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1690 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1691 for (i=0; i<m; i++) { 1692 PetscInt row = rdest[i],rowner; 1693 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1694 for (j=ai[i]; j<ai[i+1]; j++) { 1695 PetscInt cowner,col = cdest[aj[j]]; 1696 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1697 if (rowner == cowner) dnnz[i]++; 1698 else onnz[i]++; 1699 } 1700 for (j=bi[i]; j<bi[i+1]; j++) { 1701 PetscInt cowner,col = gcdest[bj[j]]; 1702 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1703 if (rowner == cowner) dnnz[i]++; 1704 else onnz[i]++; 1705 } 1706 } 1707 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1708 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1709 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1710 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1711 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1712 1713 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1714 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1715 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1716 for (i=0; i<m; i++) { 1717 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1718 PetscInt j0,rowlen; 1719 rowlen = ai[i+1] - ai[i]; 1720 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1721 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1722 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1723 } 1724 rowlen = bi[i+1] - bi[i]; 1725 for (j0=j=0; j<rowlen; j0=j) { 1726 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1727 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1728 } 1729 } 1730 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1731 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1732 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1733 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1734 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1735 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1736 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1737 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1738 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1739 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1740 *B = Aperm; 1741 PetscFunctionReturn(0); 1742 } 1743 1744 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1745 { 1746 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1747 PetscErrorCode ierr; 1748 1749 PetscFunctionBegin; 1750 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1751 if (ghosts) *ghosts = aij->garray; 1752 PetscFunctionReturn(0); 1753 } 1754 1755 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1756 { 1757 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1758 Mat A = mat->A,B = mat->B; 1759 PetscErrorCode ierr; 1760 PetscReal isend[5],irecv[5]; 1761 1762 PetscFunctionBegin; 1763 info->block_size = 1.0; 1764 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1765 1766 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1767 isend[3] = info->memory; isend[4] = info->mallocs; 1768 1769 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1770 1771 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1772 isend[3] += info->memory; isend[4] += info->mallocs; 1773 if (flag == MAT_LOCAL) { 1774 info->nz_used = isend[0]; 1775 info->nz_allocated = isend[1]; 1776 info->nz_unneeded = isend[2]; 1777 info->memory = isend[3]; 1778 info->mallocs = isend[4]; 1779 } else if (flag == MAT_GLOBAL_MAX) { 1780 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1781 1782 info->nz_used = irecv[0]; 1783 info->nz_allocated = irecv[1]; 1784 info->nz_unneeded = irecv[2]; 1785 info->memory = irecv[3]; 1786 info->mallocs = irecv[4]; 1787 } else if (flag == MAT_GLOBAL_SUM) { 1788 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1789 1790 info->nz_used = irecv[0]; 1791 info->nz_allocated = irecv[1]; 1792 info->nz_unneeded = irecv[2]; 1793 info->memory = irecv[3]; 1794 info->mallocs = irecv[4]; 1795 } 1796 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1797 info->fill_ratio_needed = 0; 1798 info->factor_mallocs = 0; 1799 PetscFunctionReturn(0); 1800 } 1801 1802 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1803 { 1804 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1805 PetscErrorCode ierr; 1806 1807 PetscFunctionBegin; 1808 switch (op) { 1809 case MAT_NEW_NONZERO_LOCATIONS: 1810 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1811 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1812 case MAT_KEEP_NONZERO_PATTERN: 1813 case MAT_NEW_NONZERO_LOCATION_ERR: 1814 case MAT_USE_INODES: 1815 case MAT_IGNORE_ZERO_ENTRIES: 1816 MatCheckPreallocated(A,1); 1817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1818 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1819 break; 1820 case MAT_ROW_ORIENTED: 1821 MatCheckPreallocated(A,1); 1822 a->roworiented = flg; 1823 1824 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1825 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1826 break; 1827 case MAT_NEW_DIAGONALS: 1828 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1829 break; 1830 case MAT_IGNORE_OFF_PROC_ENTRIES: 1831 a->donotstash = flg; 1832 break; 1833 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1834 case MAT_SPD: 1835 case MAT_SYMMETRIC: 1836 case MAT_STRUCTURALLY_SYMMETRIC: 1837 case MAT_HERMITIAN: 1838 case MAT_SYMMETRY_ETERNAL: 1839 break; 1840 case MAT_SUBMAT_SINGLEIS: 1841 A->submat_singleis = flg; 1842 break; 1843 case MAT_STRUCTURE_ONLY: 1844 /* The option is handled directly by MatSetOption() */ 1845 break; 1846 default: 1847 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1848 } 1849 PetscFunctionReturn(0); 1850 } 1851 1852 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1853 { 1854 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1855 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1856 PetscErrorCode ierr; 1857 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1858 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1859 PetscInt *cmap,*idx_p; 1860 1861 PetscFunctionBegin; 1862 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1863 mat->getrowactive = PETSC_TRUE; 1864 1865 if (!mat->rowvalues && (idx || v)) { 1866 /* 1867 allocate enough space to hold information from the longest row. 1868 */ 1869 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1870 PetscInt max = 1,tmp; 1871 for (i=0; i<matin->rmap->n; i++) { 1872 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1873 if (max < tmp) max = tmp; 1874 } 1875 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1876 } 1877 1878 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1879 lrow = row - rstart; 1880 1881 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1882 if (!v) {pvA = 0; pvB = 0;} 1883 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1884 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1885 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1886 nztot = nzA + nzB; 1887 1888 cmap = mat->garray; 1889 if (v || idx) { 1890 if (nztot) { 1891 /* Sort by increasing column numbers, assuming A and B already sorted */ 1892 PetscInt imark = -1; 1893 if (v) { 1894 *v = v_p = mat->rowvalues; 1895 for (i=0; i<nzB; i++) { 1896 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1897 else break; 1898 } 1899 imark = i; 1900 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1901 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1902 } 1903 if (idx) { 1904 *idx = idx_p = mat->rowindices; 1905 if (imark > -1) { 1906 for (i=0; i<imark; i++) { 1907 idx_p[i] = cmap[cworkB[i]]; 1908 } 1909 } else { 1910 for (i=0; i<nzB; i++) { 1911 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1912 else break; 1913 } 1914 imark = i; 1915 } 1916 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1917 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1918 } 1919 } else { 1920 if (idx) *idx = 0; 1921 if (v) *v = 0; 1922 } 1923 } 1924 *nz = nztot; 1925 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1926 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1931 { 1932 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1933 1934 PetscFunctionBegin; 1935 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1936 aij->getrowactive = PETSC_FALSE; 1937 PetscFunctionReturn(0); 1938 } 1939 1940 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1941 { 1942 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1943 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1944 PetscErrorCode ierr; 1945 PetscInt i,j,cstart = mat->cmap->rstart; 1946 PetscReal sum = 0.0; 1947 MatScalar *v; 1948 1949 PetscFunctionBegin; 1950 if (aij->size == 1) { 1951 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1952 } else { 1953 if (type == NORM_FROBENIUS) { 1954 v = amat->a; 1955 for (i=0; i<amat->nz; i++) { 1956 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1957 } 1958 v = bmat->a; 1959 for (i=0; i<bmat->nz; i++) { 1960 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1961 } 1962 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1963 *norm = PetscSqrtReal(*norm); 1964 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1965 } else if (type == NORM_1) { /* max column norm */ 1966 PetscReal *tmp,*tmp2; 1967 PetscInt *jj,*garray = aij->garray; 1968 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1969 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1970 *norm = 0.0; 1971 v = amat->a; jj = amat->j; 1972 for (j=0; j<amat->nz; j++) { 1973 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1974 } 1975 v = bmat->a; jj = bmat->j; 1976 for (j=0; j<bmat->nz; j++) { 1977 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1978 } 1979 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1980 for (j=0; j<mat->cmap->N; j++) { 1981 if (tmp2[j] > *norm) *norm = tmp2[j]; 1982 } 1983 ierr = PetscFree(tmp);CHKERRQ(ierr); 1984 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1985 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1986 } else if (type == NORM_INFINITY) { /* max row norm */ 1987 PetscReal ntemp = 0.0; 1988 for (j=0; j<aij->A->rmap->n; j++) { 1989 v = amat->a + amat->i[j]; 1990 sum = 0.0; 1991 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1992 sum += PetscAbsScalar(*v); v++; 1993 } 1994 v = bmat->a + bmat->i[j]; 1995 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1996 sum += PetscAbsScalar(*v); v++; 1997 } 1998 if (sum > ntemp) ntemp = sum; 1999 } 2000 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2001 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2002 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2003 } 2004 PetscFunctionReturn(0); 2005 } 2006 2007 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2008 { 2009 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2010 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2011 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol; 2012 PetscErrorCode ierr; 2013 Mat B,A_diag,*B_diag; 2014 MatScalar *array; 2015 2016 PetscFunctionBegin; 2017 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2018 ai = Aloc->i; aj = Aloc->j; 2019 bi = Bloc->i; bj = Bloc->j; 2020 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2021 PetscInt *d_nnz,*g_nnz,*o_nnz; 2022 PetscSFNode *oloc; 2023 PETSC_UNUSED PetscSF sf; 2024 2025 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2026 /* compute d_nnz for preallocation */ 2027 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2028 for (i=0; i<ai[ma]; i++) { 2029 d_nnz[aj[i]]++; 2030 } 2031 /* compute local off-diagonal contributions */ 2032 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2033 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2034 /* map those to global */ 2035 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2036 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2037 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2038 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2039 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2040 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2041 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2042 2043 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2044 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2045 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2046 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2047 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2048 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2049 } else { 2050 B = *matout; 2051 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2052 } 2053 2054 b = (Mat_MPIAIJ*)B->data; 2055 A_diag = a->A; 2056 B_diag = &b->A; 2057 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2058 A_diag_ncol = A_diag->cmap->N; 2059 B_diag_ilen = sub_B_diag->ilen; 2060 B_diag_i = sub_B_diag->i; 2061 2062 /* Set ilen for diagonal of B */ 2063 for (i=0; i<A_diag_ncol; i++) { 2064 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2065 } 2066 2067 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2068 very quickly (=without using MatSetValues), because all writes are local. */ 2069 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2070 2071 /* copy over the B part */ 2072 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2073 array = Bloc->a; 2074 row = A->rmap->rstart; 2075 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2076 cols_tmp = cols; 2077 for (i=0; i<mb; i++) { 2078 ncol = bi[i+1]-bi[i]; 2079 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2080 row++; 2081 array += ncol; cols_tmp += ncol; 2082 } 2083 ierr = PetscFree(cols);CHKERRQ(ierr); 2084 2085 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2086 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2087 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2088 *matout = B; 2089 } else { 2090 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2091 } 2092 PetscFunctionReturn(0); 2093 } 2094 2095 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2096 { 2097 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2098 Mat a = aij->A,b = aij->B; 2099 PetscErrorCode ierr; 2100 PetscInt s1,s2,s3; 2101 2102 PetscFunctionBegin; 2103 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2104 if (rr) { 2105 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2106 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2107 /* Overlap communication with computation. */ 2108 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2109 } 2110 if (ll) { 2111 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2112 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2113 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2114 } 2115 /* scale the diagonal block */ 2116 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2117 2118 if (rr) { 2119 /* Do a scatter end and then right scale the off-diagonal block */ 2120 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2121 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2122 } 2123 PetscFunctionReturn(0); 2124 } 2125 2126 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2127 { 2128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2129 PetscErrorCode ierr; 2130 2131 PetscFunctionBegin; 2132 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2137 { 2138 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2139 Mat a,b,c,d; 2140 PetscBool flg; 2141 PetscErrorCode ierr; 2142 2143 PetscFunctionBegin; 2144 a = matA->A; b = matA->B; 2145 c = matB->A; d = matB->B; 2146 2147 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2148 if (flg) { 2149 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2150 } 2151 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2152 PetscFunctionReturn(0); 2153 } 2154 2155 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2156 { 2157 PetscErrorCode ierr; 2158 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2159 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2160 2161 PetscFunctionBegin; 2162 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2163 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2164 /* because of the column compression in the off-processor part of the matrix a->B, 2165 the number of columns in a->B and b->B may be different, hence we cannot call 2166 the MatCopy() directly on the two parts. If need be, we can provide a more 2167 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2168 then copying the submatrices */ 2169 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2170 } else { 2171 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2172 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2173 } 2174 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2175 PetscFunctionReturn(0); 2176 } 2177 2178 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2179 { 2180 PetscErrorCode ierr; 2181 2182 PetscFunctionBegin; 2183 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 /* 2188 Computes the number of nonzeros per row needed for preallocation when X and Y 2189 have different nonzero structure. 2190 */ 2191 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2192 { 2193 PetscInt i,j,k,nzx,nzy; 2194 2195 PetscFunctionBegin; 2196 /* Set the number of nonzeros in the new matrix */ 2197 for (i=0; i<m; i++) { 2198 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2199 nzx = xi[i+1] - xi[i]; 2200 nzy = yi[i+1] - yi[i]; 2201 nnz[i] = 0; 2202 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2203 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2204 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2205 nnz[i]++; 2206 } 2207 for (; k<nzy; k++) nnz[i]++; 2208 } 2209 PetscFunctionReturn(0); 2210 } 2211 2212 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2213 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2214 { 2215 PetscErrorCode ierr; 2216 PetscInt m = Y->rmap->N; 2217 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2218 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2219 2220 PetscFunctionBegin; 2221 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2222 PetscFunctionReturn(0); 2223 } 2224 2225 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2226 { 2227 PetscErrorCode ierr; 2228 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2229 PetscBLASInt bnz,one=1; 2230 Mat_SeqAIJ *x,*y; 2231 2232 PetscFunctionBegin; 2233 if (str == SAME_NONZERO_PATTERN) { 2234 PetscScalar alpha = a; 2235 x = (Mat_SeqAIJ*)xx->A->data; 2236 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2237 y = (Mat_SeqAIJ*)yy->A->data; 2238 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2239 x = (Mat_SeqAIJ*)xx->B->data; 2240 y = (Mat_SeqAIJ*)yy->B->data; 2241 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2242 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2243 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2244 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2245 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2246 } else { 2247 Mat B; 2248 PetscInt *nnz_d,*nnz_o; 2249 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2250 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2251 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2252 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2253 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2254 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2255 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2256 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2257 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2258 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2259 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2260 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2261 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2262 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2263 } 2264 PetscFunctionReturn(0); 2265 } 2266 2267 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2268 2269 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2270 { 2271 #if defined(PETSC_USE_COMPLEX) 2272 PetscErrorCode ierr; 2273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2274 2275 PetscFunctionBegin; 2276 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2277 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2278 #else 2279 PetscFunctionBegin; 2280 #endif 2281 PetscFunctionReturn(0); 2282 } 2283 2284 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2285 { 2286 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2291 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2292 PetscFunctionReturn(0); 2293 } 2294 2295 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2296 { 2297 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2298 PetscErrorCode ierr; 2299 2300 PetscFunctionBegin; 2301 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2302 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2303 PetscFunctionReturn(0); 2304 } 2305 2306 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2307 { 2308 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2309 PetscErrorCode ierr; 2310 PetscInt i,*idxb = 0; 2311 PetscScalar *va,*vb; 2312 Vec vtmp; 2313 2314 PetscFunctionBegin; 2315 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2316 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2317 if (idx) { 2318 for (i=0; i<A->rmap->n; i++) { 2319 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2320 } 2321 } 2322 2323 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2324 if (idx) { 2325 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2326 } 2327 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2328 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2329 2330 for (i=0; i<A->rmap->n; i++) { 2331 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2332 va[i] = vb[i]; 2333 if (idx) idx[i] = a->garray[idxb[i]]; 2334 } 2335 } 2336 2337 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2338 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2339 ierr = PetscFree(idxb);CHKERRQ(ierr); 2340 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2341 PetscFunctionReturn(0); 2342 } 2343 2344 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2345 { 2346 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2347 PetscErrorCode ierr; 2348 PetscInt i,*idxb = 0; 2349 PetscScalar *va,*vb; 2350 Vec vtmp; 2351 2352 PetscFunctionBegin; 2353 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2354 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2355 if (idx) { 2356 for (i=0; i<A->cmap->n; i++) { 2357 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2358 } 2359 } 2360 2361 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2362 if (idx) { 2363 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2364 } 2365 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2366 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2367 2368 for (i=0; i<A->rmap->n; i++) { 2369 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2370 va[i] = vb[i]; 2371 if (idx) idx[i] = a->garray[idxb[i]]; 2372 } 2373 } 2374 2375 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2376 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2377 ierr = PetscFree(idxb);CHKERRQ(ierr); 2378 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2383 { 2384 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2385 PetscInt n = A->rmap->n; 2386 PetscInt cstart = A->cmap->rstart; 2387 PetscInt *cmap = mat->garray; 2388 PetscInt *diagIdx, *offdiagIdx; 2389 Vec diagV, offdiagV; 2390 PetscScalar *a, *diagA, *offdiagA; 2391 PetscInt r; 2392 PetscErrorCode ierr; 2393 2394 PetscFunctionBegin; 2395 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2396 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2397 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2398 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2399 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2400 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2401 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2402 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2403 for (r = 0; r < n; ++r) { 2404 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2405 a[r] = diagA[r]; 2406 idx[r] = cstart + diagIdx[r]; 2407 } else { 2408 a[r] = offdiagA[r]; 2409 idx[r] = cmap[offdiagIdx[r]]; 2410 } 2411 } 2412 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2413 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2414 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2415 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2416 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2417 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2422 { 2423 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2424 PetscInt n = A->rmap->n; 2425 PetscInt cstart = A->cmap->rstart; 2426 PetscInt *cmap = mat->garray; 2427 PetscInt *diagIdx, *offdiagIdx; 2428 Vec diagV, offdiagV; 2429 PetscScalar *a, *diagA, *offdiagA; 2430 PetscInt r; 2431 PetscErrorCode ierr; 2432 2433 PetscFunctionBegin; 2434 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2435 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2436 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2437 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2438 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2439 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2440 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2441 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2442 for (r = 0; r < n; ++r) { 2443 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2444 a[r] = diagA[r]; 2445 idx[r] = cstart + diagIdx[r]; 2446 } else { 2447 a[r] = offdiagA[r]; 2448 idx[r] = cmap[offdiagIdx[r]]; 2449 } 2450 } 2451 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2452 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2453 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2454 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2455 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2456 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2457 PetscFunctionReturn(0); 2458 } 2459 2460 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2461 { 2462 PetscErrorCode ierr; 2463 Mat *dummy; 2464 2465 PetscFunctionBegin; 2466 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2467 *newmat = *dummy; 2468 ierr = PetscFree(dummy);CHKERRQ(ierr); 2469 PetscFunctionReturn(0); 2470 } 2471 2472 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2473 { 2474 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2475 PetscErrorCode ierr; 2476 2477 PetscFunctionBegin; 2478 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2479 A->factorerrortype = a->A->factorerrortype; 2480 PetscFunctionReturn(0); 2481 } 2482 2483 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2484 { 2485 PetscErrorCode ierr; 2486 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2487 2488 PetscFunctionBegin; 2489 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2490 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2491 if (x->assembled) { 2492 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2493 } else { 2494 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2495 } 2496 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2497 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2498 PetscFunctionReturn(0); 2499 } 2500 2501 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2502 { 2503 PetscFunctionBegin; 2504 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2505 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2506 PetscFunctionReturn(0); 2507 } 2508 2509 /*@ 2510 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2511 2512 Collective on Mat 2513 2514 Input Parameters: 2515 + A - the matrix 2516 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2517 2518 Level: advanced 2519 2520 @*/ 2521 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2522 { 2523 PetscErrorCode ierr; 2524 2525 PetscFunctionBegin; 2526 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2531 { 2532 PetscErrorCode ierr; 2533 PetscBool sc = PETSC_FALSE,flg; 2534 2535 PetscFunctionBegin; 2536 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2537 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2538 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2539 if (flg) { 2540 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2541 } 2542 ierr = PetscOptionsTail();CHKERRQ(ierr); 2543 PetscFunctionReturn(0); 2544 } 2545 2546 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2547 { 2548 PetscErrorCode ierr; 2549 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2550 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2551 2552 PetscFunctionBegin; 2553 if (!Y->preallocated) { 2554 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2555 } else if (!aij->nz) { 2556 PetscInt nonew = aij->nonew; 2557 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2558 aij->nonew = nonew; 2559 } 2560 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2561 PetscFunctionReturn(0); 2562 } 2563 2564 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2565 { 2566 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2567 PetscErrorCode ierr; 2568 2569 PetscFunctionBegin; 2570 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2571 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2572 if (d) { 2573 PetscInt rstart; 2574 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2575 *d += rstart; 2576 2577 } 2578 PetscFunctionReturn(0); 2579 } 2580 2581 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2582 { 2583 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2584 PetscErrorCode ierr; 2585 2586 PetscFunctionBegin; 2587 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2588 PetscFunctionReturn(0); 2589 } 2590 2591 /* -------------------------------------------------------------------*/ 2592 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2593 MatGetRow_MPIAIJ, 2594 MatRestoreRow_MPIAIJ, 2595 MatMult_MPIAIJ, 2596 /* 4*/ MatMultAdd_MPIAIJ, 2597 MatMultTranspose_MPIAIJ, 2598 MatMultTransposeAdd_MPIAIJ, 2599 0, 2600 0, 2601 0, 2602 /*10*/ 0, 2603 0, 2604 0, 2605 MatSOR_MPIAIJ, 2606 MatTranspose_MPIAIJ, 2607 /*15*/ MatGetInfo_MPIAIJ, 2608 MatEqual_MPIAIJ, 2609 MatGetDiagonal_MPIAIJ, 2610 MatDiagonalScale_MPIAIJ, 2611 MatNorm_MPIAIJ, 2612 /*20*/ MatAssemblyBegin_MPIAIJ, 2613 MatAssemblyEnd_MPIAIJ, 2614 MatSetOption_MPIAIJ, 2615 MatZeroEntries_MPIAIJ, 2616 /*24*/ MatZeroRows_MPIAIJ, 2617 0, 2618 0, 2619 0, 2620 0, 2621 /*29*/ MatSetUp_MPIAIJ, 2622 0, 2623 0, 2624 MatGetDiagonalBlock_MPIAIJ, 2625 0, 2626 /*34*/ MatDuplicate_MPIAIJ, 2627 0, 2628 0, 2629 0, 2630 0, 2631 /*39*/ MatAXPY_MPIAIJ, 2632 MatCreateSubMatrices_MPIAIJ, 2633 MatIncreaseOverlap_MPIAIJ, 2634 MatGetValues_MPIAIJ, 2635 MatCopy_MPIAIJ, 2636 /*44*/ MatGetRowMax_MPIAIJ, 2637 MatScale_MPIAIJ, 2638 MatShift_MPIAIJ, 2639 MatDiagonalSet_MPIAIJ, 2640 MatZeroRowsColumns_MPIAIJ, 2641 /*49*/ MatSetRandom_MPIAIJ, 2642 0, 2643 0, 2644 0, 2645 0, 2646 /*54*/ MatFDColoringCreate_MPIXAIJ, 2647 0, 2648 MatSetUnfactored_MPIAIJ, 2649 MatPermute_MPIAIJ, 2650 0, 2651 /*59*/ MatCreateSubMatrix_MPIAIJ, 2652 MatDestroy_MPIAIJ, 2653 MatView_MPIAIJ, 2654 0, 2655 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2656 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2657 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2658 0, 2659 0, 2660 0, 2661 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2662 MatGetRowMinAbs_MPIAIJ, 2663 0, 2664 0, 2665 0, 2666 0, 2667 /*75*/ MatFDColoringApply_AIJ, 2668 MatSetFromOptions_MPIAIJ, 2669 0, 2670 0, 2671 MatFindZeroDiagonals_MPIAIJ, 2672 /*80*/ 0, 2673 0, 2674 0, 2675 /*83*/ MatLoad_MPIAIJ, 2676 MatIsSymmetric_MPIAIJ, 2677 0, 2678 0, 2679 0, 2680 0, 2681 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2682 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2683 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2684 MatPtAP_MPIAIJ_MPIAIJ, 2685 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2686 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2687 0, 2688 0, 2689 0, 2690 0, 2691 /*99*/ 0, 2692 0, 2693 0, 2694 MatConjugate_MPIAIJ, 2695 0, 2696 /*104*/MatSetValuesRow_MPIAIJ, 2697 MatRealPart_MPIAIJ, 2698 MatImaginaryPart_MPIAIJ, 2699 0, 2700 0, 2701 /*109*/0, 2702 0, 2703 MatGetRowMin_MPIAIJ, 2704 0, 2705 MatMissingDiagonal_MPIAIJ, 2706 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2707 0, 2708 MatGetGhosts_MPIAIJ, 2709 0, 2710 0, 2711 /*119*/0, 2712 0, 2713 0, 2714 0, 2715 MatGetMultiProcBlock_MPIAIJ, 2716 /*124*/MatFindNonzeroRows_MPIAIJ, 2717 MatGetColumnNorms_MPIAIJ, 2718 MatInvertBlockDiagonal_MPIAIJ, 2719 MatInvertVariableBlockDiagonal_MPIAIJ, 2720 MatCreateSubMatricesMPI_MPIAIJ, 2721 /*129*/0, 2722 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2723 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2724 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2725 0, 2726 /*134*/0, 2727 0, 2728 MatRARt_MPIAIJ_MPIAIJ, 2729 0, 2730 0, 2731 /*139*/MatSetBlockSizes_MPIAIJ, 2732 0, 2733 0, 2734 MatFDColoringSetUp_MPIXAIJ, 2735 MatFindOffBlockDiagonalEntries_MPIAIJ, 2736 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2737 }; 2738 2739 /* ----------------------------------------------------------------------------------------*/ 2740 2741 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2742 { 2743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2748 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751 2752 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2753 { 2754 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2755 PetscErrorCode ierr; 2756 2757 PetscFunctionBegin; 2758 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2759 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2760 PetscFunctionReturn(0); 2761 } 2762 2763 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2764 { 2765 Mat_MPIAIJ *b; 2766 PetscErrorCode ierr; 2767 PetscMPIInt size; 2768 2769 PetscFunctionBegin; 2770 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2771 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2772 b = (Mat_MPIAIJ*)B->data; 2773 2774 #if defined(PETSC_USE_CTABLE) 2775 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2776 #else 2777 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2778 #endif 2779 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2780 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2781 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2782 2783 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2784 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2785 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2786 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2787 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2788 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2789 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2791 2792 if (!B->preallocated) { 2793 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2794 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2795 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2796 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2797 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2798 } 2799 2800 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2801 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2802 B->preallocated = PETSC_TRUE; 2803 B->was_assembled = PETSC_FALSE; 2804 B->assembled = PETSC_FALSE; 2805 PetscFunctionReturn(0); 2806 } 2807 2808 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2809 { 2810 Mat_MPIAIJ *b; 2811 PetscErrorCode ierr; 2812 2813 PetscFunctionBegin; 2814 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2815 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2816 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2817 b = (Mat_MPIAIJ*)B->data; 2818 2819 #if defined(PETSC_USE_CTABLE) 2820 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2821 #else 2822 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2823 #endif 2824 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2825 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2826 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2827 2828 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2829 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2830 B->preallocated = PETSC_TRUE; 2831 B->was_assembled = PETSC_FALSE; 2832 B->assembled = PETSC_FALSE; 2833 PetscFunctionReturn(0); 2834 } 2835 2836 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2837 { 2838 Mat mat; 2839 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2840 PetscErrorCode ierr; 2841 2842 PetscFunctionBegin; 2843 *newmat = 0; 2844 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2845 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2846 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2847 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2848 a = (Mat_MPIAIJ*)mat->data; 2849 2850 mat->factortype = matin->factortype; 2851 mat->assembled = PETSC_TRUE; 2852 mat->insertmode = NOT_SET_VALUES; 2853 mat->preallocated = PETSC_TRUE; 2854 2855 a->size = oldmat->size; 2856 a->rank = oldmat->rank; 2857 a->donotstash = oldmat->donotstash; 2858 a->roworiented = oldmat->roworiented; 2859 a->rowindices = 0; 2860 a->rowvalues = 0; 2861 a->getrowactive = PETSC_FALSE; 2862 2863 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2864 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2865 2866 if (oldmat->colmap) { 2867 #if defined(PETSC_USE_CTABLE) 2868 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2869 #else 2870 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2871 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2872 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2873 #endif 2874 } else a->colmap = 0; 2875 if (oldmat->garray) { 2876 PetscInt len; 2877 len = oldmat->B->cmap->n; 2878 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2879 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2880 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2881 } else a->garray = 0; 2882 2883 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2884 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2885 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2886 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2887 2888 if (oldmat->Mvctx_mpi1) { 2889 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2890 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2891 } 2892 2893 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2894 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2895 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2896 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2897 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2898 *newmat = mat; 2899 PetscFunctionReturn(0); 2900 } 2901 2902 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2903 { 2904 PetscBool isbinary, ishdf5; 2905 PetscErrorCode ierr; 2906 2907 PetscFunctionBegin; 2908 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2909 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2910 /* force binary viewer to load .info file if it has not yet done so */ 2911 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2912 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2913 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2914 if (isbinary) { 2915 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2916 } else if (ishdf5) { 2917 #if defined(PETSC_HAVE_HDF5) 2918 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2919 #else 2920 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2921 #endif 2922 } else { 2923 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2924 } 2925 PetscFunctionReturn(0); 2926 } 2927 2928 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2929 { 2930 PetscScalar *vals,*svals; 2931 MPI_Comm comm; 2932 PetscErrorCode ierr; 2933 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2934 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2935 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2936 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2937 PetscInt cend,cstart,n,*rowners; 2938 int fd; 2939 PetscInt bs = newMat->rmap->bs; 2940 2941 PetscFunctionBegin; 2942 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2943 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2944 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2945 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2946 if (!rank) { 2947 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2948 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2949 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2950 } 2951 2952 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2953 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2954 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2955 if (bs < 0) bs = 1; 2956 2957 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2958 M = header[1]; N = header[2]; 2959 2960 /* If global sizes are set, check if they are consistent with that given in the file */ 2961 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2962 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2963 2964 /* determine ownership of all (block) rows */ 2965 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2966 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2967 else m = newMat->rmap->n; /* Set by user */ 2968 2969 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2970 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2971 2972 /* First process needs enough room for process with most rows */ 2973 if (!rank) { 2974 mmax = rowners[1]; 2975 for (i=2; i<=size; i++) { 2976 mmax = PetscMax(mmax, rowners[i]); 2977 } 2978 } else mmax = -1; /* unused, but compilers complain */ 2979 2980 rowners[0] = 0; 2981 for (i=2; i<=size; i++) { 2982 rowners[i] += rowners[i-1]; 2983 } 2984 rstart = rowners[rank]; 2985 rend = rowners[rank+1]; 2986 2987 /* distribute row lengths to all processors */ 2988 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2989 if (!rank) { 2990 ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr); 2991 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2992 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2993 for (j=0; j<m; j++) { 2994 procsnz[0] += ourlens[j]; 2995 } 2996 for (i=1; i<size; i++) { 2997 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr); 2998 /* calculate the number of nonzeros on each processor */ 2999 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3000 procsnz[i] += rowlengths[j]; 3001 } 3002 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3003 } 3004 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3005 } else { 3006 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3007 } 3008 3009 if (!rank) { 3010 /* determine max buffer needed and allocate it */ 3011 maxnz = 0; 3012 for (i=0; i<size; i++) { 3013 maxnz = PetscMax(maxnz,procsnz[i]); 3014 } 3015 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3016 3017 /* read in my part of the matrix column indices */ 3018 nz = procsnz[0]; 3019 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3020 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3021 3022 /* read in every one elses and ship off */ 3023 for (i=1; i<size; i++) { 3024 nz = procsnz[i]; 3025 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3026 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3027 } 3028 ierr = PetscFree(cols);CHKERRQ(ierr); 3029 } else { 3030 /* determine buffer space needed for message */ 3031 nz = 0; 3032 for (i=0; i<m; i++) { 3033 nz += ourlens[i]; 3034 } 3035 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3036 3037 /* receive message of column indices*/ 3038 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3039 } 3040 3041 /* determine column ownership if matrix is not square */ 3042 if (N != M) { 3043 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3044 else n = newMat->cmap->n; 3045 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3046 cstart = cend - n; 3047 } else { 3048 cstart = rstart; 3049 cend = rend; 3050 n = cend - cstart; 3051 } 3052 3053 /* loop over local rows, determining number of off diagonal entries */ 3054 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3055 jj = 0; 3056 for (i=0; i<m; i++) { 3057 for (j=0; j<ourlens[i]; j++) { 3058 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3059 jj++; 3060 } 3061 } 3062 3063 for (i=0; i<m; i++) { 3064 ourlens[i] -= offlens[i]; 3065 } 3066 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3067 3068 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3069 3070 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3071 3072 for (i=0; i<m; i++) { 3073 ourlens[i] += offlens[i]; 3074 } 3075 3076 if (!rank) { 3077 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3078 3079 /* read in my part of the matrix numerical values */ 3080 nz = procsnz[0]; 3081 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3082 3083 /* insert into matrix */ 3084 jj = rstart; 3085 smycols = mycols; 3086 svals = vals; 3087 for (i=0; i<m; i++) { 3088 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3089 smycols += ourlens[i]; 3090 svals += ourlens[i]; 3091 jj++; 3092 } 3093 3094 /* read in other processors and ship out */ 3095 for (i=1; i<size; i++) { 3096 nz = procsnz[i]; 3097 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3098 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3099 } 3100 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3101 } else { 3102 /* receive numeric values */ 3103 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3104 3105 /* receive message of values*/ 3106 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3107 3108 /* insert into matrix */ 3109 jj = rstart; 3110 smycols = mycols; 3111 svals = vals; 3112 for (i=0; i<m; i++) { 3113 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3114 smycols += ourlens[i]; 3115 svals += ourlens[i]; 3116 jj++; 3117 } 3118 } 3119 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3120 ierr = PetscFree(vals);CHKERRQ(ierr); 3121 ierr = PetscFree(mycols);CHKERRQ(ierr); 3122 ierr = PetscFree(rowners);CHKERRQ(ierr); 3123 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3124 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3125 PetscFunctionReturn(0); 3126 } 3127 3128 /* Not scalable because of ISAllGather() unless getting all columns. */ 3129 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3130 { 3131 PetscErrorCode ierr; 3132 IS iscol_local; 3133 PetscBool isstride; 3134 PetscMPIInt lisstride=0,gisstride; 3135 3136 PetscFunctionBegin; 3137 /* check if we are grabbing all columns*/ 3138 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3139 3140 if (isstride) { 3141 PetscInt start,len,mstart,mlen; 3142 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3143 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3144 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3145 if (mstart == start && mlen-mstart == len) lisstride = 1; 3146 } 3147 3148 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3149 if (gisstride) { 3150 PetscInt N; 3151 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3152 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3153 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3154 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3155 } else { 3156 PetscInt cbs; 3157 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3158 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3159 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3160 } 3161 3162 *isseq = iscol_local; 3163 PetscFunctionReturn(0); 3164 } 3165 3166 /* 3167 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3168 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3169 3170 Input Parameters: 3171 mat - matrix 3172 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3173 i.e., mat->rstart <= isrow[i] < mat->rend 3174 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3175 i.e., mat->cstart <= iscol[i] < mat->cend 3176 Output Parameter: 3177 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3178 iscol_o - sequential column index set for retrieving mat->B 3179 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3180 */ 3181 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3182 { 3183 PetscErrorCode ierr; 3184 Vec x,cmap; 3185 const PetscInt *is_idx; 3186 PetscScalar *xarray,*cmaparray; 3187 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3188 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3189 Mat B=a->B; 3190 Vec lvec=a->lvec,lcmap; 3191 PetscInt i,cstart,cend,Bn=B->cmap->N; 3192 MPI_Comm comm; 3193 VecScatter Mvctx=a->Mvctx; 3194 3195 PetscFunctionBegin; 3196 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3197 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3198 3199 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3200 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3201 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3202 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3203 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3204 3205 /* Get start indices */ 3206 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3207 isstart -= ncols; 3208 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3209 3210 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3211 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3212 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3213 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3214 for (i=0; i<ncols; i++) { 3215 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3216 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3217 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3218 } 3219 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3220 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3221 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3222 3223 /* Get iscol_d */ 3224 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3225 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3226 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3227 3228 /* Get isrow_d */ 3229 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3230 rstart = mat->rmap->rstart; 3231 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3232 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3233 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3234 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3235 3236 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3237 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3238 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3239 3240 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3241 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3242 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3243 3244 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3245 3246 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3247 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3248 3249 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3250 /* off-process column indices */ 3251 count = 0; 3252 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3253 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3254 3255 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3256 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3257 for (i=0; i<Bn; i++) { 3258 if (PetscRealPart(xarray[i]) > -1.0) { 3259 idx[count] = i; /* local column index in off-diagonal part B */ 3260 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3261 count++; 3262 } 3263 } 3264 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3265 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3266 3267 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3268 /* cannot ensure iscol_o has same blocksize as iscol! */ 3269 3270 ierr = PetscFree(idx);CHKERRQ(ierr); 3271 *garray = cmap1; 3272 3273 ierr = VecDestroy(&x);CHKERRQ(ierr); 3274 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3275 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3276 PetscFunctionReturn(0); 3277 } 3278 3279 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3280 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3281 { 3282 PetscErrorCode ierr; 3283 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3284 Mat M = NULL; 3285 MPI_Comm comm; 3286 IS iscol_d,isrow_d,iscol_o; 3287 Mat Asub = NULL,Bsub = NULL; 3288 PetscInt n; 3289 3290 PetscFunctionBegin; 3291 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3292 3293 if (call == MAT_REUSE_MATRIX) { 3294 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3295 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3296 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3297 3298 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3299 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3300 3301 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3302 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3303 3304 /* Update diagonal and off-diagonal portions of submat */ 3305 asub = (Mat_MPIAIJ*)(*submat)->data; 3306 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3307 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3308 if (n) { 3309 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3310 } 3311 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3312 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3313 3314 } else { /* call == MAT_INITIAL_MATRIX) */ 3315 const PetscInt *garray; 3316 PetscInt BsubN; 3317 3318 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3319 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3320 3321 /* Create local submatrices Asub and Bsub */ 3322 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3323 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3324 3325 /* Create submatrix M */ 3326 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3327 3328 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3329 asub = (Mat_MPIAIJ*)M->data; 3330 3331 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3332 n = asub->B->cmap->N; 3333 if (BsubN > n) { 3334 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3335 const PetscInt *idx; 3336 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3337 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3338 3339 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3340 j = 0; 3341 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3342 for (i=0; i<n; i++) { 3343 if (j >= BsubN) break; 3344 while (subgarray[i] > garray[j]) j++; 3345 3346 if (subgarray[i] == garray[j]) { 3347 idx_new[i] = idx[j++]; 3348 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3349 } 3350 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3351 3352 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3353 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3354 3355 } else if (BsubN < n) { 3356 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3357 } 3358 3359 ierr = PetscFree(garray);CHKERRQ(ierr); 3360 *submat = M; 3361 3362 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3363 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3364 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3365 3366 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3367 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3368 3369 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3370 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3371 } 3372 PetscFunctionReturn(0); 3373 } 3374 3375 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3376 { 3377 PetscErrorCode ierr; 3378 IS iscol_local=NULL,isrow_d; 3379 PetscInt csize; 3380 PetscInt n,i,j,start,end; 3381 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3382 MPI_Comm comm; 3383 3384 PetscFunctionBegin; 3385 /* If isrow has same processor distribution as mat, 3386 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3387 if (call == MAT_REUSE_MATRIX) { 3388 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3389 if (isrow_d) { 3390 sameRowDist = PETSC_TRUE; 3391 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3392 } else { 3393 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3394 if (iscol_local) { 3395 sameRowDist = PETSC_TRUE; 3396 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3397 } 3398 } 3399 } else { 3400 /* Check if isrow has same processor distribution as mat */ 3401 sameDist[0] = PETSC_FALSE; 3402 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3403 if (!n) { 3404 sameDist[0] = PETSC_TRUE; 3405 } else { 3406 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3407 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3408 if (i >= start && j < end) { 3409 sameDist[0] = PETSC_TRUE; 3410 } 3411 } 3412 3413 /* Check if iscol has same processor distribution as mat */ 3414 sameDist[1] = PETSC_FALSE; 3415 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3416 if (!n) { 3417 sameDist[1] = PETSC_TRUE; 3418 } else { 3419 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3420 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3421 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3422 } 3423 3424 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3425 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3426 sameRowDist = tsameDist[0]; 3427 } 3428 3429 if (sameRowDist) { 3430 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3431 /* isrow and iscol have same processor distribution as mat */ 3432 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3433 PetscFunctionReturn(0); 3434 } else { /* sameRowDist */ 3435 /* isrow has same processor distribution as mat */ 3436 if (call == MAT_INITIAL_MATRIX) { 3437 PetscBool sorted; 3438 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3439 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3440 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3441 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3442 3443 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3444 if (sorted) { 3445 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3446 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3447 PetscFunctionReturn(0); 3448 } 3449 } else { /* call == MAT_REUSE_MATRIX */ 3450 IS iscol_sub; 3451 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3452 if (iscol_sub) { 3453 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3454 PetscFunctionReturn(0); 3455 } 3456 } 3457 } 3458 } 3459 3460 /* General case: iscol -> iscol_local which has global size of iscol */ 3461 if (call == MAT_REUSE_MATRIX) { 3462 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3463 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3464 } else { 3465 if (!iscol_local) { 3466 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3467 } 3468 } 3469 3470 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3471 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3472 3473 if (call == MAT_INITIAL_MATRIX) { 3474 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3475 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3476 } 3477 PetscFunctionReturn(0); 3478 } 3479 3480 /*@C 3481 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3482 and "off-diagonal" part of the matrix in CSR format. 3483 3484 Collective on MPI_Comm 3485 3486 Input Parameters: 3487 + comm - MPI communicator 3488 . A - "diagonal" portion of matrix 3489 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3490 - garray - global index of B columns 3491 3492 Output Parameter: 3493 . mat - the matrix, with input A as its local diagonal matrix 3494 Level: advanced 3495 3496 Notes: 3497 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3498 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3499 3500 .seealso: MatCreateMPIAIJWithSplitArrays() 3501 @*/ 3502 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3503 { 3504 PetscErrorCode ierr; 3505 Mat_MPIAIJ *maij; 3506 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3507 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3508 PetscScalar *oa=b->a; 3509 Mat Bnew; 3510 PetscInt m,n,N; 3511 3512 PetscFunctionBegin; 3513 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3514 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3515 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3516 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3517 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3518 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3519 3520 /* Get global columns of mat */ 3521 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3522 3523 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3524 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3525 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3526 maij = (Mat_MPIAIJ*)(*mat)->data; 3527 3528 (*mat)->preallocated = PETSC_TRUE; 3529 3530 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3531 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3532 3533 /* Set A as diagonal portion of *mat */ 3534 maij->A = A; 3535 3536 nz = oi[m]; 3537 for (i=0; i<nz; i++) { 3538 col = oj[i]; 3539 oj[i] = garray[col]; 3540 } 3541 3542 /* Set Bnew as off-diagonal portion of *mat */ 3543 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3544 bnew = (Mat_SeqAIJ*)Bnew->data; 3545 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3546 maij->B = Bnew; 3547 3548 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3549 3550 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3551 b->free_a = PETSC_FALSE; 3552 b->free_ij = PETSC_FALSE; 3553 ierr = MatDestroy(&B);CHKERRQ(ierr); 3554 3555 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3556 bnew->free_a = PETSC_TRUE; 3557 bnew->free_ij = PETSC_TRUE; 3558 3559 /* condense columns of maij->B */ 3560 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3561 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3562 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3563 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3564 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3565 PetscFunctionReturn(0); 3566 } 3567 3568 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3569 3570 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3571 { 3572 PetscErrorCode ierr; 3573 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3574 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3575 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3576 Mat M,Msub,B=a->B; 3577 MatScalar *aa; 3578 Mat_SeqAIJ *aij; 3579 PetscInt *garray = a->garray,*colsub,Ncols; 3580 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3581 IS iscol_sub,iscmap; 3582 const PetscInt *is_idx,*cmap; 3583 PetscBool allcolumns=PETSC_FALSE; 3584 MPI_Comm comm; 3585 3586 PetscFunctionBegin; 3587 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3588 3589 if (call == MAT_REUSE_MATRIX) { 3590 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3591 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3592 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3593 3594 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3595 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3596 3597 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3598 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3599 3600 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3601 3602 } else { /* call == MAT_INITIAL_MATRIX) */ 3603 PetscBool flg; 3604 3605 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3606 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3607 3608 /* (1) iscol -> nonscalable iscol_local */ 3609 /* Check for special case: each processor gets entire matrix columns */ 3610 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3611 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3612 if (allcolumns) { 3613 iscol_sub = iscol_local; 3614 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3615 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3616 3617 } else { 3618 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3619 PetscInt *idx,*cmap1,k; 3620 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3621 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3622 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3623 count = 0; 3624 k = 0; 3625 for (i=0; i<Ncols; i++) { 3626 j = is_idx[i]; 3627 if (j >= cstart && j < cend) { 3628 /* diagonal part of mat */ 3629 idx[count] = j; 3630 cmap1[count++] = i; /* column index in submat */ 3631 } else if (Bn) { 3632 /* off-diagonal part of mat */ 3633 if (j == garray[k]) { 3634 idx[count] = j; 3635 cmap1[count++] = i; /* column index in submat */ 3636 } else if (j > garray[k]) { 3637 while (j > garray[k] && k < Bn-1) k++; 3638 if (j == garray[k]) { 3639 idx[count] = j; 3640 cmap1[count++] = i; /* column index in submat */ 3641 } 3642 } 3643 } 3644 } 3645 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3646 3647 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3648 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3649 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3650 3651 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3652 } 3653 3654 /* (3) Create sequential Msub */ 3655 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3656 } 3657 3658 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3659 aij = (Mat_SeqAIJ*)(Msub)->data; 3660 ii = aij->i; 3661 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3662 3663 /* 3664 m - number of local rows 3665 Ncols - number of columns (same on all processors) 3666 rstart - first row in new global matrix generated 3667 */ 3668 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3669 3670 if (call == MAT_INITIAL_MATRIX) { 3671 /* (4) Create parallel newmat */ 3672 PetscMPIInt rank,size; 3673 PetscInt csize; 3674 3675 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3676 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3677 3678 /* 3679 Determine the number of non-zeros in the diagonal and off-diagonal 3680 portions of the matrix in order to do correct preallocation 3681 */ 3682 3683 /* first get start and end of "diagonal" columns */ 3684 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3685 if (csize == PETSC_DECIDE) { 3686 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3687 if (mglobal == Ncols) { /* square matrix */ 3688 nlocal = m; 3689 } else { 3690 nlocal = Ncols/size + ((Ncols % size) > rank); 3691 } 3692 } else { 3693 nlocal = csize; 3694 } 3695 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3696 rstart = rend - nlocal; 3697 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3698 3699 /* next, compute all the lengths */ 3700 jj = aij->j; 3701 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3702 olens = dlens + m; 3703 for (i=0; i<m; i++) { 3704 jend = ii[i+1] - ii[i]; 3705 olen = 0; 3706 dlen = 0; 3707 for (j=0; j<jend; j++) { 3708 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3709 else dlen++; 3710 jj++; 3711 } 3712 olens[i] = olen; 3713 dlens[i] = dlen; 3714 } 3715 3716 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3717 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3718 3719 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3720 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3721 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3722 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3723 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3724 ierr = PetscFree(dlens);CHKERRQ(ierr); 3725 3726 } else { /* call == MAT_REUSE_MATRIX */ 3727 M = *newmat; 3728 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3729 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3730 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3731 /* 3732 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3733 rather than the slower MatSetValues(). 3734 */ 3735 M->was_assembled = PETSC_TRUE; 3736 M->assembled = PETSC_FALSE; 3737 } 3738 3739 /* (5) Set values of Msub to *newmat */ 3740 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3741 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3742 3743 jj = aij->j; 3744 aa = aij->a; 3745 for (i=0; i<m; i++) { 3746 row = rstart + i; 3747 nz = ii[i+1] - ii[i]; 3748 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3749 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3750 jj += nz; aa += nz; 3751 } 3752 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3753 3754 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3755 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3756 3757 ierr = PetscFree(colsub);CHKERRQ(ierr); 3758 3759 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3760 if (call == MAT_INITIAL_MATRIX) { 3761 *newmat = M; 3762 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3763 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3764 3765 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3766 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3767 3768 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3769 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3770 3771 if (iscol_local) { 3772 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3773 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3774 } 3775 } 3776 PetscFunctionReturn(0); 3777 } 3778 3779 /* 3780 Not great since it makes two copies of the submatrix, first an SeqAIJ 3781 in local and then by concatenating the local matrices the end result. 3782 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3783 3784 Note: This requires a sequential iscol with all indices. 3785 */ 3786 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3787 { 3788 PetscErrorCode ierr; 3789 PetscMPIInt rank,size; 3790 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3791 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3792 Mat M,Mreuse; 3793 MatScalar *aa,*vwork; 3794 MPI_Comm comm; 3795 Mat_SeqAIJ *aij; 3796 PetscBool colflag,allcolumns=PETSC_FALSE; 3797 3798 PetscFunctionBegin; 3799 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3800 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3801 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3802 3803 /* Check for special case: each processor gets entire matrix columns */ 3804 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3805 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3806 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3807 3808 if (call == MAT_REUSE_MATRIX) { 3809 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3810 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3811 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3812 } else { 3813 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3814 } 3815 3816 /* 3817 m - number of local rows 3818 n - number of columns (same on all processors) 3819 rstart - first row in new global matrix generated 3820 */ 3821 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3822 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3823 if (call == MAT_INITIAL_MATRIX) { 3824 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3825 ii = aij->i; 3826 jj = aij->j; 3827 3828 /* 3829 Determine the number of non-zeros in the diagonal and off-diagonal 3830 portions of the matrix in order to do correct preallocation 3831 */ 3832 3833 /* first get start and end of "diagonal" columns */ 3834 if (csize == PETSC_DECIDE) { 3835 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3836 if (mglobal == n) { /* square matrix */ 3837 nlocal = m; 3838 } else { 3839 nlocal = n/size + ((n % size) > rank); 3840 } 3841 } else { 3842 nlocal = csize; 3843 } 3844 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3845 rstart = rend - nlocal; 3846 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3847 3848 /* next, compute all the lengths */ 3849 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3850 olens = dlens + m; 3851 for (i=0; i<m; i++) { 3852 jend = ii[i+1] - ii[i]; 3853 olen = 0; 3854 dlen = 0; 3855 for (j=0; j<jend; j++) { 3856 if (*jj < rstart || *jj >= rend) olen++; 3857 else dlen++; 3858 jj++; 3859 } 3860 olens[i] = olen; 3861 dlens[i] = dlen; 3862 } 3863 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3864 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3865 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3866 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3867 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3868 ierr = PetscFree(dlens);CHKERRQ(ierr); 3869 } else { 3870 PetscInt ml,nl; 3871 3872 M = *newmat; 3873 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3874 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3875 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3876 /* 3877 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3878 rather than the slower MatSetValues(). 3879 */ 3880 M->was_assembled = PETSC_TRUE; 3881 M->assembled = PETSC_FALSE; 3882 } 3883 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3884 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3885 ii = aij->i; 3886 jj = aij->j; 3887 aa = aij->a; 3888 for (i=0; i<m; i++) { 3889 row = rstart + i; 3890 nz = ii[i+1] - ii[i]; 3891 cwork = jj; jj += nz; 3892 vwork = aa; aa += nz; 3893 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3894 } 3895 3896 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3897 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3898 *newmat = M; 3899 3900 /* save submatrix used in processor for next request */ 3901 if (call == MAT_INITIAL_MATRIX) { 3902 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3903 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3904 } 3905 PetscFunctionReturn(0); 3906 } 3907 3908 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3909 { 3910 PetscInt m,cstart, cend,j,nnz,i,d; 3911 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3912 const PetscInt *JJ; 3913 PetscScalar *values; 3914 PetscErrorCode ierr; 3915 PetscBool nooffprocentries; 3916 3917 PetscFunctionBegin; 3918 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3919 3920 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3921 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3922 m = B->rmap->n; 3923 cstart = B->cmap->rstart; 3924 cend = B->cmap->rend; 3925 rstart = B->rmap->rstart; 3926 3927 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3928 3929 #if defined(PETSC_USE_DEBUG) 3930 for (i=0; i<m && Ii; i++) { 3931 nnz = Ii[i+1]- Ii[i]; 3932 JJ = J + Ii[i]; 3933 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3934 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3935 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3936 } 3937 #endif 3938 3939 for (i=0; i<m && Ii; i++) { 3940 nnz = Ii[i+1]- Ii[i]; 3941 JJ = J + Ii[i]; 3942 nnz_max = PetscMax(nnz_max,nnz); 3943 d = 0; 3944 for (j=0; j<nnz; j++) { 3945 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3946 } 3947 d_nnz[i] = d; 3948 o_nnz[i] = nnz - d; 3949 } 3950 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3951 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3952 3953 if (v) values = (PetscScalar*)v; 3954 else { 3955 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3956 } 3957 3958 for (i=0; i<m && Ii; i++) { 3959 ii = i + rstart; 3960 nnz = Ii[i+1]- Ii[i]; 3961 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3962 } 3963 nooffprocentries = B->nooffprocentries; 3964 B->nooffprocentries = PETSC_TRUE; 3965 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3966 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3967 B->nooffprocentries = nooffprocentries; 3968 3969 if (!v) { 3970 ierr = PetscFree(values);CHKERRQ(ierr); 3971 } 3972 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3973 PetscFunctionReturn(0); 3974 } 3975 3976 /*@ 3977 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3978 (the default parallel PETSc format). 3979 3980 Collective on MPI_Comm 3981 3982 Input Parameters: 3983 + B - the matrix 3984 . i - the indices into j for the start of each local row (starts with zero) 3985 . j - the column indices for each local row (starts with zero) 3986 - v - optional values in the matrix 3987 3988 Level: developer 3989 3990 Notes: 3991 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3992 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3993 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3994 3995 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3996 3997 The format which is used for the sparse matrix input, is equivalent to a 3998 row-major ordering.. i.e for the following matrix, the input data expected is 3999 as shown 4000 4001 $ 1 0 0 4002 $ 2 0 3 P0 4003 $ ------- 4004 $ 4 5 6 P1 4005 $ 4006 $ Process0 [P0]: rows_owned=[0,1] 4007 $ i = {0,1,3} [size = nrow+1 = 2+1] 4008 $ j = {0,0,2} [size = 3] 4009 $ v = {1,2,3} [size = 3] 4010 $ 4011 $ Process1 [P1]: rows_owned=[2] 4012 $ i = {0,3} [size = nrow+1 = 1+1] 4013 $ j = {0,1,2} [size = 3] 4014 $ v = {4,5,6} [size = 3] 4015 4016 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4017 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4018 @*/ 4019 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4020 { 4021 PetscErrorCode ierr; 4022 4023 PetscFunctionBegin; 4024 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4025 PetscFunctionReturn(0); 4026 } 4027 4028 /*@C 4029 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4030 (the default parallel PETSc format). For good matrix assembly performance 4031 the user should preallocate the matrix storage by setting the parameters 4032 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4033 performance can be increased by more than a factor of 50. 4034 4035 Collective on MPI_Comm 4036 4037 Input Parameters: 4038 + B - the matrix 4039 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4040 (same value is used for all local rows) 4041 . d_nnz - array containing the number of nonzeros in the various rows of the 4042 DIAGONAL portion of the local submatrix (possibly different for each row) 4043 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4044 The size of this array is equal to the number of local rows, i.e 'm'. 4045 For matrices that will be factored, you must leave room for (and set) 4046 the diagonal entry even if it is zero. 4047 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4048 submatrix (same value is used for all local rows). 4049 - o_nnz - array containing the number of nonzeros in the various rows of the 4050 OFF-DIAGONAL portion of the local submatrix (possibly different for 4051 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4052 structure. The size of this array is equal to the number 4053 of local rows, i.e 'm'. 4054 4055 If the *_nnz parameter is given then the *_nz parameter is ignored 4056 4057 The AIJ format (also called the Yale sparse matrix format or 4058 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4059 storage. The stored row and column indices begin with zero. 4060 See Users-Manual: ch_mat for details. 4061 4062 The parallel matrix is partitioned such that the first m0 rows belong to 4063 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4064 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4065 4066 The DIAGONAL portion of the local submatrix of a processor can be defined 4067 as the submatrix which is obtained by extraction the part corresponding to 4068 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4069 first row that belongs to the processor, r2 is the last row belonging to 4070 the this processor, and c1-c2 is range of indices of the local part of a 4071 vector suitable for applying the matrix to. This is an mxn matrix. In the 4072 common case of a square matrix, the row and column ranges are the same and 4073 the DIAGONAL part is also square. The remaining portion of the local 4074 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4075 4076 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4077 4078 You can call MatGetInfo() to get information on how effective the preallocation was; 4079 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4080 You can also run with the option -info and look for messages with the string 4081 malloc in them to see if additional memory allocation was needed. 4082 4083 Example usage: 4084 4085 Consider the following 8x8 matrix with 34 non-zero values, that is 4086 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4087 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4088 as follows: 4089 4090 .vb 4091 1 2 0 | 0 3 0 | 0 4 4092 Proc0 0 5 6 | 7 0 0 | 8 0 4093 9 0 10 | 11 0 0 | 12 0 4094 ------------------------------------- 4095 13 0 14 | 15 16 17 | 0 0 4096 Proc1 0 18 0 | 19 20 21 | 0 0 4097 0 0 0 | 22 23 0 | 24 0 4098 ------------------------------------- 4099 Proc2 25 26 27 | 0 0 28 | 29 0 4100 30 0 0 | 31 32 33 | 0 34 4101 .ve 4102 4103 This can be represented as a collection of submatrices as: 4104 4105 .vb 4106 A B C 4107 D E F 4108 G H I 4109 .ve 4110 4111 Where the submatrices A,B,C are owned by proc0, D,E,F are 4112 owned by proc1, G,H,I are owned by proc2. 4113 4114 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4115 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4116 The 'M','N' parameters are 8,8, and have the same values on all procs. 4117 4118 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4119 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4120 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4121 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4122 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4123 matrix, ans [DF] as another SeqAIJ matrix. 4124 4125 When d_nz, o_nz parameters are specified, d_nz storage elements are 4126 allocated for every row of the local diagonal submatrix, and o_nz 4127 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4128 One way to choose d_nz and o_nz is to use the max nonzerors per local 4129 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4130 In this case, the values of d_nz,o_nz are: 4131 .vb 4132 proc0 : dnz = 2, o_nz = 2 4133 proc1 : dnz = 3, o_nz = 2 4134 proc2 : dnz = 1, o_nz = 4 4135 .ve 4136 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4137 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4138 for proc3. i.e we are using 12+15+10=37 storage locations to store 4139 34 values. 4140 4141 When d_nnz, o_nnz parameters are specified, the storage is specified 4142 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4143 In the above case the values for d_nnz,o_nnz are: 4144 .vb 4145 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4146 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4147 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4148 .ve 4149 Here the space allocated is sum of all the above values i.e 34, and 4150 hence pre-allocation is perfect. 4151 4152 Level: intermediate 4153 4154 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4155 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4156 @*/ 4157 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4158 { 4159 PetscErrorCode ierr; 4160 4161 PetscFunctionBegin; 4162 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4163 PetscValidType(B,1); 4164 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4165 PetscFunctionReturn(0); 4166 } 4167 4168 /*@ 4169 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4170 CSR format the local rows. 4171 4172 Collective on MPI_Comm 4173 4174 Input Parameters: 4175 + comm - MPI communicator 4176 . m - number of local rows (Cannot be PETSC_DECIDE) 4177 . n - This value should be the same as the local size used in creating the 4178 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4179 calculated if N is given) For square matrices n is almost always m. 4180 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4181 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4182 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4183 . j - column indices 4184 - a - matrix values 4185 4186 Output Parameter: 4187 . mat - the matrix 4188 4189 Level: intermediate 4190 4191 Notes: 4192 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4193 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4194 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4195 4196 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4197 4198 The format which is used for the sparse matrix input, is equivalent to a 4199 row-major ordering.. i.e for the following matrix, the input data expected is 4200 as shown 4201 4202 $ 1 0 0 4203 $ 2 0 3 P0 4204 $ ------- 4205 $ 4 5 6 P1 4206 $ 4207 $ Process0 [P0]: rows_owned=[0,1] 4208 $ i = {0,1,3} [size = nrow+1 = 2+1] 4209 $ j = {0,0,2} [size = 3] 4210 $ v = {1,2,3} [size = 3] 4211 $ 4212 $ Process1 [P1]: rows_owned=[2] 4213 $ i = {0,3} [size = nrow+1 = 1+1] 4214 $ j = {0,1,2} [size = 3] 4215 $ v = {4,5,6} [size = 3] 4216 4217 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4218 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4219 @*/ 4220 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4221 { 4222 PetscErrorCode ierr; 4223 4224 PetscFunctionBegin; 4225 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4226 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4227 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4228 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4229 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4230 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4231 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4232 PetscFunctionReturn(0); 4233 } 4234 4235 /*@C 4236 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4237 (the default parallel PETSc format). For good matrix assembly performance 4238 the user should preallocate the matrix storage by setting the parameters 4239 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4240 performance can be increased by more than a factor of 50. 4241 4242 Collective on MPI_Comm 4243 4244 Input Parameters: 4245 + comm - MPI communicator 4246 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4247 This value should be the same as the local size used in creating the 4248 y vector for the matrix-vector product y = Ax. 4249 . n - This value should be the same as the local size used in creating the 4250 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4251 calculated if N is given) For square matrices n is almost always m. 4252 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4253 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4254 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4255 (same value is used for all local rows) 4256 . d_nnz - array containing the number of nonzeros in the various rows of the 4257 DIAGONAL portion of the local submatrix (possibly different for each row) 4258 or NULL, if d_nz is used to specify the nonzero structure. 4259 The size of this array is equal to the number of local rows, i.e 'm'. 4260 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4261 submatrix (same value is used for all local rows). 4262 - o_nnz - array containing the number of nonzeros in the various rows of the 4263 OFF-DIAGONAL portion of the local submatrix (possibly different for 4264 each row) or NULL, if o_nz is used to specify the nonzero 4265 structure. The size of this array is equal to the number 4266 of local rows, i.e 'm'. 4267 4268 Output Parameter: 4269 . A - the matrix 4270 4271 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4272 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4273 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4274 4275 Notes: 4276 If the *_nnz parameter is given then the *_nz parameter is ignored 4277 4278 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4279 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4280 storage requirements for this matrix. 4281 4282 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4283 processor than it must be used on all processors that share the object for 4284 that argument. 4285 4286 The user MUST specify either the local or global matrix dimensions 4287 (possibly both). 4288 4289 The parallel matrix is partitioned across processors such that the 4290 first m0 rows belong to process 0, the next m1 rows belong to 4291 process 1, the next m2 rows belong to process 2 etc.. where 4292 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4293 values corresponding to [m x N] submatrix. 4294 4295 The columns are logically partitioned with the n0 columns belonging 4296 to 0th partition, the next n1 columns belonging to the next 4297 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4298 4299 The DIAGONAL portion of the local submatrix on any given processor 4300 is the submatrix corresponding to the rows and columns m,n 4301 corresponding to the given processor. i.e diagonal matrix on 4302 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4303 etc. The remaining portion of the local submatrix [m x (N-n)] 4304 constitute the OFF-DIAGONAL portion. The example below better 4305 illustrates this concept. 4306 4307 For a square global matrix we define each processor's diagonal portion 4308 to be its local rows and the corresponding columns (a square submatrix); 4309 each processor's off-diagonal portion encompasses the remainder of the 4310 local matrix (a rectangular submatrix). 4311 4312 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4313 4314 When calling this routine with a single process communicator, a matrix of 4315 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4316 type of communicator, use the construction mechanism 4317 .vb 4318 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4319 .ve 4320 4321 $ MatCreate(...,&A); 4322 $ MatSetType(A,MATMPIAIJ); 4323 $ MatSetSizes(A, m,n,M,N); 4324 $ MatMPIAIJSetPreallocation(A,...); 4325 4326 By default, this format uses inodes (identical nodes) when possible. 4327 We search for consecutive rows with the same nonzero structure, thereby 4328 reusing matrix information to achieve increased efficiency. 4329 4330 Options Database Keys: 4331 + -mat_no_inode - Do not use inodes 4332 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4333 4334 4335 4336 Example usage: 4337 4338 Consider the following 8x8 matrix with 34 non-zero values, that is 4339 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4340 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4341 as follows 4342 4343 .vb 4344 1 2 0 | 0 3 0 | 0 4 4345 Proc0 0 5 6 | 7 0 0 | 8 0 4346 9 0 10 | 11 0 0 | 12 0 4347 ------------------------------------- 4348 13 0 14 | 15 16 17 | 0 0 4349 Proc1 0 18 0 | 19 20 21 | 0 0 4350 0 0 0 | 22 23 0 | 24 0 4351 ------------------------------------- 4352 Proc2 25 26 27 | 0 0 28 | 29 0 4353 30 0 0 | 31 32 33 | 0 34 4354 .ve 4355 4356 This can be represented as a collection of submatrices as 4357 4358 .vb 4359 A B C 4360 D E F 4361 G H I 4362 .ve 4363 4364 Where the submatrices A,B,C are owned by proc0, D,E,F are 4365 owned by proc1, G,H,I are owned by proc2. 4366 4367 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4368 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4369 The 'M','N' parameters are 8,8, and have the same values on all procs. 4370 4371 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4372 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4373 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4374 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4375 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4376 matrix, ans [DF] as another SeqAIJ matrix. 4377 4378 When d_nz, o_nz parameters are specified, d_nz storage elements are 4379 allocated for every row of the local diagonal submatrix, and o_nz 4380 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4381 One way to choose d_nz and o_nz is to use the max nonzerors per local 4382 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4383 In this case, the values of d_nz,o_nz are 4384 .vb 4385 proc0 : dnz = 2, o_nz = 2 4386 proc1 : dnz = 3, o_nz = 2 4387 proc2 : dnz = 1, o_nz = 4 4388 .ve 4389 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4390 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4391 for proc3. i.e we are using 12+15+10=37 storage locations to store 4392 34 values. 4393 4394 When d_nnz, o_nnz parameters are specified, the storage is specified 4395 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4396 In the above case the values for d_nnz,o_nnz are 4397 .vb 4398 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4399 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4400 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4401 .ve 4402 Here the space allocated is sum of all the above values i.e 34, and 4403 hence pre-allocation is perfect. 4404 4405 Level: intermediate 4406 4407 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4408 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4409 @*/ 4410 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4411 { 4412 PetscErrorCode ierr; 4413 PetscMPIInt size; 4414 4415 PetscFunctionBegin; 4416 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4417 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4418 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4419 if (size > 1) { 4420 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4421 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4422 } else { 4423 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4424 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4425 } 4426 PetscFunctionReturn(0); 4427 } 4428 4429 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4430 { 4431 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4432 PetscBool flg; 4433 PetscErrorCode ierr; 4434 4435 PetscFunctionBegin; 4436 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4437 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4438 if (Ad) *Ad = a->A; 4439 if (Ao) *Ao = a->B; 4440 if (colmap) *colmap = a->garray; 4441 PetscFunctionReturn(0); 4442 } 4443 4444 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4445 { 4446 PetscErrorCode ierr; 4447 PetscInt m,N,i,rstart,nnz,Ii; 4448 PetscInt *indx; 4449 PetscScalar *values; 4450 4451 PetscFunctionBegin; 4452 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4453 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4454 PetscInt *dnz,*onz,sum,bs,cbs; 4455 4456 if (n == PETSC_DECIDE) { 4457 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4458 } 4459 /* Check sum(n) = N */ 4460 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4461 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4462 4463 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4464 rstart -= m; 4465 4466 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4467 for (i=0; i<m; i++) { 4468 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4469 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4470 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4471 } 4472 4473 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4474 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4475 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4476 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4477 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4478 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4479 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4480 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4481 } 4482 4483 /* numeric phase */ 4484 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4485 for (i=0; i<m; i++) { 4486 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4487 Ii = i + rstart; 4488 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4489 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4490 } 4491 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4492 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4493 PetscFunctionReturn(0); 4494 } 4495 4496 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4497 { 4498 PetscErrorCode ierr; 4499 PetscMPIInt rank; 4500 PetscInt m,N,i,rstart,nnz; 4501 size_t len; 4502 const PetscInt *indx; 4503 PetscViewer out; 4504 char *name; 4505 Mat B; 4506 const PetscScalar *values; 4507 4508 PetscFunctionBegin; 4509 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4510 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4511 /* Should this be the type of the diagonal block of A? */ 4512 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4513 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4514 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4515 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4516 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4517 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4518 for (i=0; i<m; i++) { 4519 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4520 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4521 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4522 } 4523 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4524 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4525 4526 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4527 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4528 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4529 sprintf(name,"%s.%d",outfile,rank); 4530 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4531 ierr = PetscFree(name);CHKERRQ(ierr); 4532 ierr = MatView(B,out);CHKERRQ(ierr); 4533 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4534 ierr = MatDestroy(&B);CHKERRQ(ierr); 4535 PetscFunctionReturn(0); 4536 } 4537 4538 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4539 { 4540 PetscErrorCode ierr; 4541 Mat_Merge_SeqsToMPI *merge; 4542 PetscContainer container; 4543 4544 PetscFunctionBegin; 4545 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4546 if (container) { 4547 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4548 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4549 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4550 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4551 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4552 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4553 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4554 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4555 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4556 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4557 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4558 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4559 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4560 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4561 ierr = PetscFree(merge);CHKERRQ(ierr); 4562 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4563 } 4564 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4565 PetscFunctionReturn(0); 4566 } 4567 4568 #include <../src/mat/utils/freespace.h> 4569 #include <petscbt.h> 4570 4571 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4572 { 4573 PetscErrorCode ierr; 4574 MPI_Comm comm; 4575 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4576 PetscMPIInt size,rank,taga,*len_s; 4577 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4578 PetscInt proc,m; 4579 PetscInt **buf_ri,**buf_rj; 4580 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4581 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4582 MPI_Request *s_waits,*r_waits; 4583 MPI_Status *status; 4584 MatScalar *aa=a->a; 4585 MatScalar **abuf_r,*ba_i; 4586 Mat_Merge_SeqsToMPI *merge; 4587 PetscContainer container; 4588 4589 PetscFunctionBegin; 4590 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4591 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4592 4593 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4594 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4595 4596 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4597 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4598 4599 bi = merge->bi; 4600 bj = merge->bj; 4601 buf_ri = merge->buf_ri; 4602 buf_rj = merge->buf_rj; 4603 4604 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4605 owners = merge->rowmap->range; 4606 len_s = merge->len_s; 4607 4608 /* send and recv matrix values */ 4609 /*-----------------------------*/ 4610 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4611 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4612 4613 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4614 for (proc=0,k=0; proc<size; proc++) { 4615 if (!len_s[proc]) continue; 4616 i = owners[proc]; 4617 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4618 k++; 4619 } 4620 4621 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4622 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4623 ierr = PetscFree(status);CHKERRQ(ierr); 4624 4625 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4626 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4627 4628 /* insert mat values of mpimat */ 4629 /*----------------------------*/ 4630 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4631 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4632 4633 for (k=0; k<merge->nrecv; k++) { 4634 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4635 nrows = *(buf_ri_k[k]); 4636 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4637 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4638 } 4639 4640 /* set values of ba */ 4641 m = merge->rowmap->n; 4642 for (i=0; i<m; i++) { 4643 arow = owners[rank] + i; 4644 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4645 bnzi = bi[i+1] - bi[i]; 4646 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4647 4648 /* add local non-zero vals of this proc's seqmat into ba */ 4649 anzi = ai[arow+1] - ai[arow]; 4650 aj = a->j + ai[arow]; 4651 aa = a->a + ai[arow]; 4652 nextaj = 0; 4653 for (j=0; nextaj<anzi; j++) { 4654 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4655 ba_i[j] += aa[nextaj++]; 4656 } 4657 } 4658 4659 /* add received vals into ba */ 4660 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4661 /* i-th row */ 4662 if (i == *nextrow[k]) { 4663 anzi = *(nextai[k]+1) - *nextai[k]; 4664 aj = buf_rj[k] + *(nextai[k]); 4665 aa = abuf_r[k] + *(nextai[k]); 4666 nextaj = 0; 4667 for (j=0; nextaj<anzi; j++) { 4668 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4669 ba_i[j] += aa[nextaj++]; 4670 } 4671 } 4672 nextrow[k]++; nextai[k]++; 4673 } 4674 } 4675 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4676 } 4677 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4678 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4679 4680 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4681 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4682 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4683 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4684 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4685 PetscFunctionReturn(0); 4686 } 4687 4688 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4689 { 4690 PetscErrorCode ierr; 4691 Mat B_mpi; 4692 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4693 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4694 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4695 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4696 PetscInt len,proc,*dnz,*onz,bs,cbs; 4697 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4698 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4699 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4700 MPI_Status *status; 4701 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4702 PetscBT lnkbt; 4703 Mat_Merge_SeqsToMPI *merge; 4704 PetscContainer container; 4705 4706 PetscFunctionBegin; 4707 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4708 4709 /* make sure it is a PETSc comm */ 4710 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4711 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4712 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4713 4714 ierr = PetscNew(&merge);CHKERRQ(ierr); 4715 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4716 4717 /* determine row ownership */ 4718 /*---------------------------------------------------------*/ 4719 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4720 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4721 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4722 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4723 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4724 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4725 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4726 4727 m = merge->rowmap->n; 4728 owners = merge->rowmap->range; 4729 4730 /* determine the number of messages to send, their lengths */ 4731 /*---------------------------------------------------------*/ 4732 len_s = merge->len_s; 4733 4734 len = 0; /* length of buf_si[] */ 4735 merge->nsend = 0; 4736 for (proc=0; proc<size; proc++) { 4737 len_si[proc] = 0; 4738 if (proc == rank) { 4739 len_s[proc] = 0; 4740 } else { 4741 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4742 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4743 } 4744 if (len_s[proc]) { 4745 merge->nsend++; 4746 nrows = 0; 4747 for (i=owners[proc]; i<owners[proc+1]; i++) { 4748 if (ai[i+1] > ai[i]) nrows++; 4749 } 4750 len_si[proc] = 2*(nrows+1); 4751 len += len_si[proc]; 4752 } 4753 } 4754 4755 /* determine the number and length of messages to receive for ij-structure */ 4756 /*-------------------------------------------------------------------------*/ 4757 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4758 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4759 4760 /* post the Irecv of j-structure */ 4761 /*-------------------------------*/ 4762 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4763 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4764 4765 /* post the Isend of j-structure */ 4766 /*--------------------------------*/ 4767 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4768 4769 for (proc=0, k=0; proc<size; proc++) { 4770 if (!len_s[proc]) continue; 4771 i = owners[proc]; 4772 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4773 k++; 4774 } 4775 4776 /* receives and sends of j-structure are complete */ 4777 /*------------------------------------------------*/ 4778 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4779 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4780 4781 /* send and recv i-structure */ 4782 /*---------------------------*/ 4783 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4784 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4785 4786 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4787 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4788 for (proc=0,k=0; proc<size; proc++) { 4789 if (!len_s[proc]) continue; 4790 /* form outgoing message for i-structure: 4791 buf_si[0]: nrows to be sent 4792 [1:nrows]: row index (global) 4793 [nrows+1:2*nrows+1]: i-structure index 4794 */ 4795 /*-------------------------------------------*/ 4796 nrows = len_si[proc]/2 - 1; 4797 buf_si_i = buf_si + nrows+1; 4798 buf_si[0] = nrows; 4799 buf_si_i[0] = 0; 4800 nrows = 0; 4801 for (i=owners[proc]; i<owners[proc+1]; i++) { 4802 anzi = ai[i+1] - ai[i]; 4803 if (anzi) { 4804 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4805 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4806 nrows++; 4807 } 4808 } 4809 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4810 k++; 4811 buf_si += len_si[proc]; 4812 } 4813 4814 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4815 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4816 4817 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4818 for (i=0; i<merge->nrecv; i++) { 4819 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4820 } 4821 4822 ierr = PetscFree(len_si);CHKERRQ(ierr); 4823 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4824 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4825 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4826 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4827 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4828 ierr = PetscFree(status);CHKERRQ(ierr); 4829 4830 /* compute a local seq matrix in each processor */ 4831 /*----------------------------------------------*/ 4832 /* allocate bi array and free space for accumulating nonzero column info */ 4833 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4834 bi[0] = 0; 4835 4836 /* create and initialize a linked list */ 4837 nlnk = N+1; 4838 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4839 4840 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4841 len = ai[owners[rank+1]] - ai[owners[rank]]; 4842 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4843 4844 current_space = free_space; 4845 4846 /* determine symbolic info for each local row */ 4847 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4848 4849 for (k=0; k<merge->nrecv; k++) { 4850 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4851 nrows = *buf_ri_k[k]; 4852 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4853 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4854 } 4855 4856 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4857 len = 0; 4858 for (i=0; i<m; i++) { 4859 bnzi = 0; 4860 /* add local non-zero cols of this proc's seqmat into lnk */ 4861 arow = owners[rank] + i; 4862 anzi = ai[arow+1] - ai[arow]; 4863 aj = a->j + ai[arow]; 4864 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4865 bnzi += nlnk; 4866 /* add received col data into lnk */ 4867 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4868 if (i == *nextrow[k]) { /* i-th row */ 4869 anzi = *(nextai[k]+1) - *nextai[k]; 4870 aj = buf_rj[k] + *nextai[k]; 4871 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4872 bnzi += nlnk; 4873 nextrow[k]++; nextai[k]++; 4874 } 4875 } 4876 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4877 4878 /* if free space is not available, make more free space */ 4879 if (current_space->local_remaining<bnzi) { 4880 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4881 nspacedouble++; 4882 } 4883 /* copy data into free space, then initialize lnk */ 4884 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4885 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4886 4887 current_space->array += bnzi; 4888 current_space->local_used += bnzi; 4889 current_space->local_remaining -= bnzi; 4890 4891 bi[i+1] = bi[i] + bnzi; 4892 } 4893 4894 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4895 4896 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4897 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4898 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4899 4900 /* create symbolic parallel matrix B_mpi */ 4901 /*---------------------------------------*/ 4902 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4903 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4904 if (n==PETSC_DECIDE) { 4905 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4906 } else { 4907 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4908 } 4909 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4910 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4911 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4912 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4913 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4914 4915 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4916 B_mpi->assembled = PETSC_FALSE; 4917 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4918 merge->bi = bi; 4919 merge->bj = bj; 4920 merge->buf_ri = buf_ri; 4921 merge->buf_rj = buf_rj; 4922 merge->coi = NULL; 4923 merge->coj = NULL; 4924 merge->owners_co = NULL; 4925 4926 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4927 4928 /* attach the supporting struct to B_mpi for reuse */ 4929 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4930 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4931 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4932 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4933 *mpimat = B_mpi; 4934 4935 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4936 PetscFunctionReturn(0); 4937 } 4938 4939 /*@C 4940 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4941 matrices from each processor 4942 4943 Collective on MPI_Comm 4944 4945 Input Parameters: 4946 + comm - the communicators the parallel matrix will live on 4947 . seqmat - the input sequential matrices 4948 . m - number of local rows (or PETSC_DECIDE) 4949 . n - number of local columns (or PETSC_DECIDE) 4950 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4951 4952 Output Parameter: 4953 . mpimat - the parallel matrix generated 4954 4955 Level: advanced 4956 4957 Notes: 4958 The dimensions of the sequential matrix in each processor MUST be the same. 4959 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4960 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4961 @*/ 4962 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4963 { 4964 PetscErrorCode ierr; 4965 PetscMPIInt size; 4966 4967 PetscFunctionBegin; 4968 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4969 if (size == 1) { 4970 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4971 if (scall == MAT_INITIAL_MATRIX) { 4972 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4973 } else { 4974 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4975 } 4976 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4977 PetscFunctionReturn(0); 4978 } 4979 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4980 if (scall == MAT_INITIAL_MATRIX) { 4981 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4982 } 4983 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4984 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4985 PetscFunctionReturn(0); 4986 } 4987 4988 /*@ 4989 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4990 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4991 with MatGetSize() 4992 4993 Not Collective 4994 4995 Input Parameters: 4996 + A - the matrix 4997 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4998 4999 Output Parameter: 5000 . A_loc - the local sequential matrix generated 5001 5002 Level: developer 5003 5004 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5005 5006 @*/ 5007 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5008 { 5009 PetscErrorCode ierr; 5010 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5011 Mat_SeqAIJ *mat,*a,*b; 5012 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5013 MatScalar *aa,*ba,*cam; 5014 PetscScalar *ca; 5015 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5016 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5017 PetscBool match; 5018 MPI_Comm comm; 5019 PetscMPIInt size; 5020 5021 PetscFunctionBegin; 5022 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5023 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5024 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5025 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5026 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5027 5028 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5029 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5030 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5031 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5032 aa = a->a; ba = b->a; 5033 if (scall == MAT_INITIAL_MATRIX) { 5034 if (size == 1) { 5035 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5036 PetscFunctionReturn(0); 5037 } 5038 5039 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5040 ci[0] = 0; 5041 for (i=0; i<am; i++) { 5042 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5043 } 5044 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5045 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5046 k = 0; 5047 for (i=0; i<am; i++) { 5048 ncols_o = bi[i+1] - bi[i]; 5049 ncols_d = ai[i+1] - ai[i]; 5050 /* off-diagonal portion of A */ 5051 for (jo=0; jo<ncols_o; jo++) { 5052 col = cmap[*bj]; 5053 if (col >= cstart) break; 5054 cj[k] = col; bj++; 5055 ca[k++] = *ba++; 5056 } 5057 /* diagonal portion of A */ 5058 for (j=0; j<ncols_d; j++) { 5059 cj[k] = cstart + *aj++; 5060 ca[k++] = *aa++; 5061 } 5062 /* off-diagonal portion of A */ 5063 for (j=jo; j<ncols_o; j++) { 5064 cj[k] = cmap[*bj++]; 5065 ca[k++] = *ba++; 5066 } 5067 } 5068 /* put together the new matrix */ 5069 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5070 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5071 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5072 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5073 mat->free_a = PETSC_TRUE; 5074 mat->free_ij = PETSC_TRUE; 5075 mat->nonew = 0; 5076 } else if (scall == MAT_REUSE_MATRIX) { 5077 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5078 ci = mat->i; cj = mat->j; cam = mat->a; 5079 for (i=0; i<am; i++) { 5080 /* off-diagonal portion of A */ 5081 ncols_o = bi[i+1] - bi[i]; 5082 for (jo=0; jo<ncols_o; jo++) { 5083 col = cmap[*bj]; 5084 if (col >= cstart) break; 5085 *cam++ = *ba++; bj++; 5086 } 5087 /* diagonal portion of A */ 5088 ncols_d = ai[i+1] - ai[i]; 5089 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5090 /* off-diagonal portion of A */ 5091 for (j=jo; j<ncols_o; j++) { 5092 *cam++ = *ba++; bj++; 5093 } 5094 } 5095 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5096 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5097 PetscFunctionReturn(0); 5098 } 5099 5100 /*@C 5101 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5102 5103 Not Collective 5104 5105 Input Parameters: 5106 + A - the matrix 5107 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5108 - row, col - index sets of rows and columns to extract (or NULL) 5109 5110 Output Parameter: 5111 . A_loc - the local sequential matrix generated 5112 5113 Level: developer 5114 5115 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5116 5117 @*/ 5118 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5119 { 5120 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5121 PetscErrorCode ierr; 5122 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5123 IS isrowa,iscola; 5124 Mat *aloc; 5125 PetscBool match; 5126 5127 PetscFunctionBegin; 5128 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5129 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5130 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5131 if (!row) { 5132 start = A->rmap->rstart; end = A->rmap->rend; 5133 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5134 } else { 5135 isrowa = *row; 5136 } 5137 if (!col) { 5138 start = A->cmap->rstart; 5139 cmap = a->garray; 5140 nzA = a->A->cmap->n; 5141 nzB = a->B->cmap->n; 5142 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5143 ncols = 0; 5144 for (i=0; i<nzB; i++) { 5145 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5146 else break; 5147 } 5148 imark = i; 5149 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5150 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5151 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5152 } else { 5153 iscola = *col; 5154 } 5155 if (scall != MAT_INITIAL_MATRIX) { 5156 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5157 aloc[0] = *A_loc; 5158 } 5159 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5160 if (!col) { /* attach global id of condensed columns */ 5161 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5162 } 5163 *A_loc = aloc[0]; 5164 ierr = PetscFree(aloc);CHKERRQ(ierr); 5165 if (!row) { 5166 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5167 } 5168 if (!col) { 5169 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5170 } 5171 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5172 PetscFunctionReturn(0); 5173 } 5174 5175 /*@C 5176 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5177 5178 Collective on Mat 5179 5180 Input Parameters: 5181 + A,B - the matrices in mpiaij format 5182 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5183 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5184 5185 Output Parameter: 5186 + rowb, colb - index sets of rows and columns of B to extract 5187 - B_seq - the sequential matrix generated 5188 5189 Level: developer 5190 5191 @*/ 5192 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5193 { 5194 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5195 PetscErrorCode ierr; 5196 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5197 IS isrowb,iscolb; 5198 Mat *bseq=NULL; 5199 5200 PetscFunctionBegin; 5201 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5202 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5203 } 5204 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5205 5206 if (scall == MAT_INITIAL_MATRIX) { 5207 start = A->cmap->rstart; 5208 cmap = a->garray; 5209 nzA = a->A->cmap->n; 5210 nzB = a->B->cmap->n; 5211 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5212 ncols = 0; 5213 for (i=0; i<nzB; i++) { /* row < local row index */ 5214 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5215 else break; 5216 } 5217 imark = i; 5218 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5219 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5220 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5221 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5222 } else { 5223 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5224 isrowb = *rowb; iscolb = *colb; 5225 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5226 bseq[0] = *B_seq; 5227 } 5228 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5229 *B_seq = bseq[0]; 5230 ierr = PetscFree(bseq);CHKERRQ(ierr); 5231 if (!rowb) { 5232 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5233 } else { 5234 *rowb = isrowb; 5235 } 5236 if (!colb) { 5237 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5238 } else { 5239 *colb = iscolb; 5240 } 5241 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5242 PetscFunctionReturn(0); 5243 } 5244 5245 /* 5246 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5247 of the OFF-DIAGONAL portion of local A 5248 5249 Collective on Mat 5250 5251 Input Parameters: 5252 + A,B - the matrices in mpiaij format 5253 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5254 5255 Output Parameter: 5256 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5257 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5258 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5259 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5260 5261 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5262 for this matrix. This is not desirable.. 5263 5264 Level: developer 5265 5266 */ 5267 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5268 { 5269 PetscErrorCode ierr; 5270 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5271 Mat_SeqAIJ *b_oth; 5272 VecScatter ctx; 5273 MPI_Comm comm; 5274 const PetscMPIInt *rprocs,*sprocs; 5275 const PetscInt *srow,*rstarts,*sstarts; 5276 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5277 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5278 PetscScalar *b_otha,*bufa,*bufA,*vals; 5279 MPI_Request *rwaits = NULL,*swaits = NULL; 5280 MPI_Status rstatus; 5281 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5282 5283 PetscFunctionBegin; 5284 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5285 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5286 5287 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5288 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5289 } 5290 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5291 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5292 5293 if (size == 1) { 5294 startsj_s = NULL; 5295 bufa_ptr = NULL; 5296 *B_oth = NULL; 5297 PetscFunctionReturn(0); 5298 } 5299 5300 ctx = a->Mvctx; 5301 tag = ((PetscObject)ctx)->tag; 5302 5303 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5304 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5305 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5306 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5307 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5308 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5309 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5310 5311 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5312 if (scall == MAT_INITIAL_MATRIX) { 5313 /* i-array */ 5314 /*---------*/ 5315 /* post receives */ 5316 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5317 for (i=0; i<nrecvs; i++) { 5318 rowlen = rvalues + rstarts[i]*rbs; 5319 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5320 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5321 } 5322 5323 /* pack the outgoing message */ 5324 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5325 5326 sstartsj[0] = 0; 5327 rstartsj[0] = 0; 5328 len = 0; /* total length of j or a array to be sent */ 5329 if (nsends) { 5330 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5331 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5332 } 5333 for (i=0; i<nsends; i++) { 5334 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5335 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5336 for (j=0; j<nrows; j++) { 5337 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5338 for (l=0; l<sbs; l++) { 5339 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5340 5341 rowlen[j*sbs+l] = ncols; 5342 5343 len += ncols; 5344 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5345 } 5346 k++; 5347 } 5348 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5349 5350 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5351 } 5352 /* recvs and sends of i-array are completed */ 5353 i = nrecvs; 5354 while (i--) { 5355 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5356 } 5357 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5358 ierr = PetscFree(svalues);CHKERRQ(ierr); 5359 5360 /* allocate buffers for sending j and a arrays */ 5361 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5362 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5363 5364 /* create i-array of B_oth */ 5365 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5366 5367 b_othi[0] = 0; 5368 len = 0; /* total length of j or a array to be received */ 5369 k = 0; 5370 for (i=0; i<nrecvs; i++) { 5371 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5372 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5373 for (j=0; j<nrows; j++) { 5374 b_othi[k+1] = b_othi[k] + rowlen[j]; 5375 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5376 k++; 5377 } 5378 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5379 } 5380 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5381 5382 /* allocate space for j and a arrrays of B_oth */ 5383 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5384 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5385 5386 /* j-array */ 5387 /*---------*/ 5388 /* post receives of j-array */ 5389 for (i=0; i<nrecvs; i++) { 5390 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5391 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5392 } 5393 5394 /* pack the outgoing message j-array */ 5395 if (nsends) k = sstarts[0]; 5396 for (i=0; i<nsends; i++) { 5397 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5398 bufJ = bufj+sstartsj[i]; 5399 for (j=0; j<nrows; j++) { 5400 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5401 for (ll=0; ll<sbs; ll++) { 5402 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5403 for (l=0; l<ncols; l++) { 5404 *bufJ++ = cols[l]; 5405 } 5406 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5407 } 5408 } 5409 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5410 } 5411 5412 /* recvs and sends of j-array are completed */ 5413 i = nrecvs; 5414 while (i--) { 5415 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5416 } 5417 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5418 } else if (scall == MAT_REUSE_MATRIX) { 5419 sstartsj = *startsj_s; 5420 rstartsj = *startsj_r; 5421 bufa = *bufa_ptr; 5422 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5423 b_otha = b_oth->a; 5424 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5425 5426 /* a-array */ 5427 /*---------*/ 5428 /* post receives of a-array */ 5429 for (i=0; i<nrecvs; i++) { 5430 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5431 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5432 } 5433 5434 /* pack the outgoing message a-array */ 5435 if (nsends) k = sstarts[0]; 5436 for (i=0; i<nsends; i++) { 5437 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5438 bufA = bufa+sstartsj[i]; 5439 for (j=0; j<nrows; j++) { 5440 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5441 for (ll=0; ll<sbs; ll++) { 5442 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5443 for (l=0; l<ncols; l++) { 5444 *bufA++ = vals[l]; 5445 } 5446 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5447 } 5448 } 5449 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5450 } 5451 /* recvs and sends of a-array are completed */ 5452 i = nrecvs; 5453 while (i--) { 5454 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5455 } 5456 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5457 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5458 5459 if (scall == MAT_INITIAL_MATRIX) { 5460 /* put together the new matrix */ 5461 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5462 5463 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5464 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5465 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5466 b_oth->free_a = PETSC_TRUE; 5467 b_oth->free_ij = PETSC_TRUE; 5468 b_oth->nonew = 0; 5469 5470 ierr = PetscFree(bufj);CHKERRQ(ierr); 5471 if (!startsj_s || !bufa_ptr) { 5472 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5473 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5474 } else { 5475 *startsj_s = sstartsj; 5476 *startsj_r = rstartsj; 5477 *bufa_ptr = bufa; 5478 } 5479 } 5480 5481 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5482 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5483 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5484 PetscFunctionReturn(0); 5485 } 5486 5487 /*@C 5488 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5489 5490 Not Collective 5491 5492 Input Parameters: 5493 . A - The matrix in mpiaij format 5494 5495 Output Parameter: 5496 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5497 . colmap - A map from global column index to local index into lvec 5498 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5499 5500 Level: developer 5501 5502 @*/ 5503 #if defined(PETSC_USE_CTABLE) 5504 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5505 #else 5506 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5507 #endif 5508 { 5509 Mat_MPIAIJ *a; 5510 5511 PetscFunctionBegin; 5512 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5513 PetscValidPointer(lvec, 2); 5514 PetscValidPointer(colmap, 3); 5515 PetscValidPointer(multScatter, 4); 5516 a = (Mat_MPIAIJ*) A->data; 5517 if (lvec) *lvec = a->lvec; 5518 if (colmap) *colmap = a->colmap; 5519 if (multScatter) *multScatter = a->Mvctx; 5520 PetscFunctionReturn(0); 5521 } 5522 5523 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5524 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5525 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5526 #if defined(PETSC_HAVE_MKL_SPARSE) 5527 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5528 #endif 5529 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5530 #if defined(PETSC_HAVE_ELEMENTAL) 5531 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5532 #endif 5533 #if defined(PETSC_HAVE_HYPRE) 5534 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5535 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5536 #endif 5537 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5538 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5539 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5540 5541 /* 5542 Computes (B'*A')' since computing B*A directly is untenable 5543 5544 n p p 5545 ( ) ( ) ( ) 5546 m ( A ) * n ( B ) = m ( C ) 5547 ( ) ( ) ( ) 5548 5549 */ 5550 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5551 { 5552 PetscErrorCode ierr; 5553 Mat At,Bt,Ct; 5554 5555 PetscFunctionBegin; 5556 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5557 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5558 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5559 ierr = MatDestroy(&At);CHKERRQ(ierr); 5560 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5561 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5562 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5563 PetscFunctionReturn(0); 5564 } 5565 5566 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5567 { 5568 PetscErrorCode ierr; 5569 PetscInt m=A->rmap->n,n=B->cmap->n; 5570 Mat Cmat; 5571 5572 PetscFunctionBegin; 5573 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5574 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5575 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5576 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5577 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5578 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5579 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5580 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5581 5582 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5583 5584 *C = Cmat; 5585 PetscFunctionReturn(0); 5586 } 5587 5588 /* ----------------------------------------------------------------*/ 5589 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5590 { 5591 PetscErrorCode ierr; 5592 5593 PetscFunctionBegin; 5594 if (scall == MAT_INITIAL_MATRIX) { 5595 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5596 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5597 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5598 } 5599 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5600 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5601 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5602 PetscFunctionReturn(0); 5603 } 5604 5605 /*MC 5606 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5607 5608 Options Database Keys: 5609 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5610 5611 Level: beginner 5612 5613 .seealso: MatCreateAIJ() 5614 M*/ 5615 5616 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5617 { 5618 Mat_MPIAIJ *b; 5619 PetscErrorCode ierr; 5620 PetscMPIInt size; 5621 5622 PetscFunctionBegin; 5623 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5624 5625 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5626 B->data = (void*)b; 5627 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5628 B->assembled = PETSC_FALSE; 5629 B->insertmode = NOT_SET_VALUES; 5630 b->size = size; 5631 5632 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5633 5634 /* build cache for off array entries formed */ 5635 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5636 5637 b->donotstash = PETSC_FALSE; 5638 b->colmap = 0; 5639 b->garray = 0; 5640 b->roworiented = PETSC_TRUE; 5641 5642 /* stuff used for matrix vector multiply */ 5643 b->lvec = NULL; 5644 b->Mvctx = NULL; 5645 5646 /* stuff for MatGetRow() */ 5647 b->rowindices = 0; 5648 b->rowvalues = 0; 5649 b->getrowactive = PETSC_FALSE; 5650 5651 /* flexible pointer used in CUSP/CUSPARSE classes */ 5652 b->spptr = NULL; 5653 5654 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5657 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5660 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5664 #if defined(PETSC_HAVE_MKL_SPARSE) 5665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5666 #endif 5667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5669 #if defined(PETSC_HAVE_ELEMENTAL) 5670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5671 #endif 5672 #if defined(PETSC_HAVE_HYPRE) 5673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5674 #endif 5675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5680 #if defined(PETSC_HAVE_HYPRE) 5681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5682 #endif 5683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5684 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5685 PetscFunctionReturn(0); 5686 } 5687 5688 /*@C 5689 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5690 and "off-diagonal" part of the matrix in CSR format. 5691 5692 Collective on MPI_Comm 5693 5694 Input Parameters: 5695 + comm - MPI communicator 5696 . m - number of local rows (Cannot be PETSC_DECIDE) 5697 . n - This value should be the same as the local size used in creating the 5698 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5699 calculated if N is given) For square matrices n is almost always m. 5700 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5701 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5702 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5703 . j - column indices 5704 . a - matrix values 5705 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5706 . oj - column indices 5707 - oa - matrix values 5708 5709 Output Parameter: 5710 . mat - the matrix 5711 5712 Level: advanced 5713 5714 Notes: 5715 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5716 must free the arrays once the matrix has been destroyed and not before. 5717 5718 The i and j indices are 0 based 5719 5720 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5721 5722 This sets local rows and cannot be used to set off-processor values. 5723 5724 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5725 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5726 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5727 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5728 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5729 communication if it is known that only local entries will be set. 5730 5731 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5732 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5733 @*/ 5734 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5735 { 5736 PetscErrorCode ierr; 5737 Mat_MPIAIJ *maij; 5738 5739 PetscFunctionBegin; 5740 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5741 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5742 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5743 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5744 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5745 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5746 maij = (Mat_MPIAIJ*) (*mat)->data; 5747 5748 (*mat)->preallocated = PETSC_TRUE; 5749 5750 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5751 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5752 5753 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5754 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5755 5756 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5757 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5758 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5759 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5760 5761 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5762 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5763 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5764 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5765 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5766 PetscFunctionReturn(0); 5767 } 5768 5769 /* 5770 Special version for direct calls from Fortran 5771 */ 5772 #include <petsc/private/fortranimpl.h> 5773 5774 /* Change these macros so can be used in void function */ 5775 #undef CHKERRQ 5776 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5777 #undef SETERRQ2 5778 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5779 #undef SETERRQ3 5780 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5781 #undef SETERRQ 5782 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5783 5784 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5785 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5786 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5787 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5788 #else 5789 #endif 5790 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5791 { 5792 Mat mat = *mmat; 5793 PetscInt m = *mm, n = *mn; 5794 InsertMode addv = *maddv; 5795 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5796 PetscScalar value; 5797 PetscErrorCode ierr; 5798 5799 MatCheckPreallocated(mat,1); 5800 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5801 5802 #if defined(PETSC_USE_DEBUG) 5803 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5804 #endif 5805 { 5806 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5807 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5808 PetscBool roworiented = aij->roworiented; 5809 5810 /* Some Variables required in the macro */ 5811 Mat A = aij->A; 5812 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5813 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5814 MatScalar *aa = a->a; 5815 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5816 Mat B = aij->B; 5817 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5818 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5819 MatScalar *ba = b->a; 5820 5821 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5822 PetscInt nonew = a->nonew; 5823 MatScalar *ap1,*ap2; 5824 5825 PetscFunctionBegin; 5826 for (i=0; i<m; i++) { 5827 if (im[i] < 0) continue; 5828 #if defined(PETSC_USE_DEBUG) 5829 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5830 #endif 5831 if (im[i] >= rstart && im[i] < rend) { 5832 row = im[i] - rstart; 5833 lastcol1 = -1; 5834 rp1 = aj + ai[row]; 5835 ap1 = aa + ai[row]; 5836 rmax1 = aimax[row]; 5837 nrow1 = ailen[row]; 5838 low1 = 0; 5839 high1 = nrow1; 5840 lastcol2 = -1; 5841 rp2 = bj + bi[row]; 5842 ap2 = ba + bi[row]; 5843 rmax2 = bimax[row]; 5844 nrow2 = bilen[row]; 5845 low2 = 0; 5846 high2 = nrow2; 5847 5848 for (j=0; j<n; j++) { 5849 if (roworiented) value = v[i*n+j]; 5850 else value = v[i+j*m]; 5851 if (in[j] >= cstart && in[j] < cend) { 5852 col = in[j] - cstart; 5853 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5854 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5855 } else if (in[j] < 0) continue; 5856 #if defined(PETSC_USE_DEBUG) 5857 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5858 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5859 #endif 5860 else { 5861 if (mat->was_assembled) { 5862 if (!aij->colmap) { 5863 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5864 } 5865 #if defined(PETSC_USE_CTABLE) 5866 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5867 col--; 5868 #else 5869 col = aij->colmap[in[j]] - 1; 5870 #endif 5871 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5872 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5873 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5874 col = in[j]; 5875 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5876 B = aij->B; 5877 b = (Mat_SeqAIJ*)B->data; 5878 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5879 rp2 = bj + bi[row]; 5880 ap2 = ba + bi[row]; 5881 rmax2 = bimax[row]; 5882 nrow2 = bilen[row]; 5883 low2 = 0; 5884 high2 = nrow2; 5885 bm = aij->B->rmap->n; 5886 ba = b->a; 5887 } 5888 } else col = in[j]; 5889 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5890 } 5891 } 5892 } else if (!aij->donotstash) { 5893 if (roworiented) { 5894 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5895 } else { 5896 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5897 } 5898 } 5899 } 5900 } 5901 PetscFunctionReturnVoid(); 5902 } 5903