1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 637 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 638 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 639 */ 640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 641 { 642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 643 Mat A = aij->A; /* diagonal part of the matrix */ 644 Mat B = aij->B; /* offdiagonal part of the matrix */ 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 646 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 647 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 648 PetscInt *ailen = a->ilen,*aj = a->j; 649 PetscInt *bilen = b->ilen,*bj = b->j; 650 PetscInt am = aij->A->rmap->n,j; 651 PetscInt diag_so_far = 0,dnz; 652 PetscInt offd_so_far = 0,onz; 653 654 PetscFunctionBegin; 655 /* Iterate over all rows of the matrix */ 656 for (j=0; j<am; j++) { 657 dnz = onz = 0; 658 /* Iterate over all non-zero columns of the current row */ 659 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 660 /* If column is in the diagonal */ 661 if (mat_j[col] >= cstart && mat_j[col] < cend) { 662 aj[diag_so_far++] = mat_j[col] - cstart; 663 dnz++; 664 } else { /* off-diagonal entries */ 665 bj[offd_so_far++] = mat_j[col]; 666 onz++; 667 } 668 } 669 ailen[j] = dnz; 670 bilen[j] = onz; 671 } 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 677 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 678 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 679 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 680 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 681 */ 682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 Mat A = aij->A; /* diagonal part of the matrix */ 686 Mat B = aij->B; /* offdiagonal part of the matrix */ 687 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 689 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 690 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 691 PetscInt *ailen = a->ilen,*aj = a->j; 692 PetscInt *bilen = b->ilen,*bj = b->j; 693 PetscInt am = aij->A->rmap->n,j; 694 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 695 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 696 PetscScalar *aa = a->a,*ba = b->a; 697 698 PetscFunctionBegin; 699 /* Iterate over all rows of the matrix */ 700 for (j=0; j<am; j++) { 701 dnz_row = onz_row = 0; 702 rowstart_offd = full_offd_i[j]; 703 rowstart_diag = full_diag_i[j]; 704 /* Iterate over all non-zero columns of the current row */ 705 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 706 /* If column is in the diagonal */ 707 if (mat_j[col] >= cstart && mat_j[col] < cend) { 708 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 709 aa[rowstart_diag+dnz_row] = mat_a[col]; 710 dnz_row++; 711 } else { /* off-diagonal entries */ 712 bj[rowstart_offd+onz_row] = mat_j[col]; 713 ba[rowstart_offd+onz_row] = mat_a[col]; 714 onz_row++; 715 } 716 } 717 ailen[j] = dnz_row; 718 bilen[j] = onz_row; 719 } 720 PetscFunctionReturn(0); 721 } 722 723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 724 { 725 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 726 PetscErrorCode ierr; 727 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 728 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 729 730 PetscFunctionBegin; 731 for (i=0; i<m; i++) { 732 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 733 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 734 if (idxm[i] >= rstart && idxm[i] < rend) { 735 row = idxm[i] - rstart; 736 for (j=0; j<n; j++) { 737 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 738 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 739 if (idxn[j] >= cstart && idxn[j] < cend) { 740 col = idxn[j] - cstart; 741 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 742 } else { 743 if (!aij->colmap) { 744 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 745 } 746 #if defined(PETSC_USE_CTABLE) 747 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 753 else { 754 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 755 } 756 } 757 } 758 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 764 765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 766 { 767 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 768 PetscErrorCode ierr; 769 PetscInt nstash,reallocs; 770 771 PetscFunctionBegin; 772 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 773 774 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 775 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 776 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 777 PetscFunctionReturn(0); 778 } 779 780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 781 { 782 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 783 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 784 PetscErrorCode ierr; 785 PetscMPIInt n; 786 PetscInt i,j,rstart,ncols,flg; 787 PetscInt *row,*col; 788 PetscBool other_disassembled; 789 PetscScalar *val; 790 791 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 792 793 PetscFunctionBegin; 794 if (!aij->donotstash && !mat->nooffprocentries) { 795 while (1) { 796 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 797 if (!flg) break; 798 799 for (i=0; i<n; ) { 800 /* Now identify the consecutive vals belonging to the same row */ 801 for (j=i,rstart=row[j]; j<n; j++) { 802 if (row[j] != rstart) break; 803 } 804 if (j < n) ncols = j-i; 805 else ncols = n-i; 806 /* Now assemble all these values with a single function call */ 807 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 808 809 i = j; 810 } 811 } 812 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 813 } 814 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 815 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 816 817 /* determine if any processor has disassembled, if so we must 818 also disassemble ourselfs, in order that we may reassemble. */ 819 /* 820 if nonzero structure of submatrix B cannot change then we know that 821 no processor disassembled thus we can skip this stuff 822 */ 823 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 824 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 825 if (mat->was_assembled && !other_disassembled) { 826 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 827 } 828 } 829 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 830 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 831 } 832 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 833 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 835 836 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 837 838 aij->rowvalues = 0; 839 840 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 841 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 842 843 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 844 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 845 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 846 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 852 { 853 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 854 PetscErrorCode ierr; 855 856 PetscFunctionBegin; 857 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 858 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 865 PetscInt *lrows; 866 PetscInt r, len; 867 PetscBool cong; 868 PetscErrorCode ierr; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 873 /* fix right hand side if needed */ 874 if (x && b) { 875 const PetscScalar *xx; 876 PetscScalar *bb; 877 878 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 879 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 880 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 881 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 882 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 883 } 884 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 885 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 886 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 887 if ((diag != 0.0) && cong) { 888 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 889 } else if (diag != 0.0) { 890 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 891 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 892 for (r = 0; r < len; ++r) { 893 const PetscInt row = lrows[r] + A->rmap->rstart; 894 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 895 } 896 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 898 } else { 899 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 900 } 901 ierr = PetscFree(lrows);CHKERRQ(ierr); 902 903 /* only change matrix nonzero state if pattern was allowed to be changed */ 904 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 905 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 906 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 912 { 913 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 914 PetscErrorCode ierr; 915 PetscMPIInt n = A->rmap->n; 916 PetscInt i,j,r,m,p = 0,len = 0; 917 PetscInt *lrows,*owners = A->rmap->range; 918 PetscSFNode *rrows; 919 PetscSF sf; 920 const PetscScalar *xx; 921 PetscScalar *bb,*mask; 922 Vec xmask,lmask; 923 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 924 const PetscInt *aj, *ii,*ridx; 925 PetscScalar *aa; 926 927 PetscFunctionBegin; 928 /* Create SF where leaves are input rows and roots are owned rows */ 929 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 930 for (r = 0; r < n; ++r) lrows[r] = -1; 931 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 932 for (r = 0; r < N; ++r) { 933 const PetscInt idx = rows[r]; 934 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 935 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 936 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 937 } 938 rrows[r].rank = p; 939 rrows[r].index = rows[r] - owners[p]; 940 } 941 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 942 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 943 /* Collect flags for rows to be zeroed */ 944 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 945 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 946 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 947 /* Compress and put in row numbers */ 948 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 949 /* zero diagonal part of matrix */ 950 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 951 /* handle off diagonal part of matrix */ 952 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 953 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 954 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 955 for (i=0; i<len; i++) bb[lrows[i]] = 1; 956 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 957 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 958 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 960 if (x) { 961 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 964 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 965 } 966 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 967 /* remove zeroed rows of off diagonal matrix */ 968 ii = aij->i; 969 for (i=0; i<len; i++) { 970 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 971 } 972 /* loop over all elements of off process part of matrix zeroing removed columns*/ 973 if (aij->compressedrow.use) { 974 m = aij->compressedrow.nrows; 975 ii = aij->compressedrow.i; 976 ridx = aij->compressedrow.rindex; 977 for (i=0; i<m; i++) { 978 n = ii[i+1] - ii[i]; 979 aj = aij->j + ii[i]; 980 aa = aij->a + ii[i]; 981 982 for (j=0; j<n; j++) { 983 if (PetscAbsScalar(mask[*aj])) { 984 if (b) bb[*ridx] -= *aa*xx[*aj]; 985 *aa = 0.0; 986 } 987 aa++; 988 aj++; 989 } 990 ridx++; 991 } 992 } else { /* do not use compressed row format */ 993 m = l->B->rmap->n; 994 for (i=0; i<m; i++) { 995 n = ii[i+1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij->a + ii[i]; 998 for (j=0; j<n; j++) { 999 if (PetscAbsScalar(mask[*aj])) { 1000 if (b) bb[i] -= *aa*xx[*aj]; 1001 *aa = 0.0; 1002 } 1003 aa++; 1004 aj++; 1005 } 1006 } 1007 } 1008 if (x) { 1009 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1010 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1011 } 1012 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1013 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1014 ierr = PetscFree(lrows);CHKERRQ(ierr); 1015 1016 /* only change matrix nonzero state if pattern was allowed to be changed */ 1017 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1018 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1019 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1025 { 1026 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1027 PetscErrorCode ierr; 1028 PetscInt nt; 1029 VecScatter Mvctx = a->Mvctx; 1030 1031 PetscFunctionBegin; 1032 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1033 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1034 1035 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1045 PetscErrorCode ierr; 1046 1047 PetscFunctionBegin; 1048 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1060 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1062 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1063 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 PetscBool merged; 1072 1073 PetscFunctionBegin; 1074 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1075 /* do nondiagonal part */ 1076 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1077 if (!merged) { 1078 /* send it on its way */ 1079 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1080 /* do local part */ 1081 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1082 /* receive remote parts: note this assumes the values are not actually */ 1083 /* added in yy until the next line, */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 } else { 1086 /* do local part */ 1087 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1088 /* send it on its way */ 1089 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1090 /* values actually were received in the Begin() but we need to call this nop */ 1091 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1097 { 1098 MPI_Comm comm; 1099 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1100 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1101 IS Me,Notme; 1102 PetscErrorCode ierr; 1103 PetscInt M,N,first,last,*notme,i; 1104 PetscBool lf; 1105 PetscMPIInt size; 1106 1107 PetscFunctionBegin; 1108 /* Easy test: symmetric diagonal block */ 1109 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1110 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1111 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1112 if (!*f) PetscFunctionReturn(0); 1113 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1114 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1115 if (size == 1) PetscFunctionReturn(0); 1116 1117 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1118 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1119 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1120 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1121 for (i=0; i<first; i++) notme[i] = i; 1122 for (i=last; i<M; i++) notme[i-last+first] = i; 1123 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1124 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1125 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1126 Aoff = Aoffs[0]; 1127 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1128 Boff = Boffs[0]; 1129 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1130 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1131 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1132 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1133 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1134 ierr = PetscFree(notme);CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1139 { 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 1152 PetscFunctionBegin; 1153 /* do nondiagonal part */ 1154 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1155 /* send it on its way */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 /* do local part */ 1158 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1159 /* receive remote parts */ 1160 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 /* 1165 This only works correctly for square matrices where the subblock A->A is the 1166 diagonal block 1167 */ 1168 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1169 { 1170 PetscErrorCode ierr; 1171 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1172 1173 PetscFunctionBegin; 1174 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1175 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1176 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1181 { 1182 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1183 PetscErrorCode ierr; 1184 1185 PetscFunctionBegin; 1186 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1187 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1192 { 1193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1194 PetscErrorCode ierr; 1195 1196 PetscFunctionBegin; 1197 #if defined(PETSC_USE_LOG) 1198 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1199 #endif 1200 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1201 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1202 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1203 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1204 #if defined(PETSC_USE_CTABLE) 1205 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1206 #else 1207 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1208 #endif 1209 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1210 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1211 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1212 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1213 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1214 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1215 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1216 1217 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1226 #if defined(PETSC_HAVE_ELEMENTAL) 1227 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1228 #endif 1229 #if defined(PETSC_HAVE_HYPRE) 1230 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1232 #endif 1233 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1239 { 1240 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1241 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1242 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1243 PetscErrorCode ierr; 1244 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1245 int fd; 1246 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1247 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1248 PetscScalar *column_values; 1249 PetscInt message_count,flowcontrolcount; 1250 FILE *file; 1251 1252 PetscFunctionBegin; 1253 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1254 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1255 nz = A->nz + B->nz; 1256 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1257 if (!rank) { 1258 header[0] = MAT_FILE_CLASSID; 1259 header[1] = mat->rmap->N; 1260 header[2] = mat->cmap->N; 1261 1262 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1263 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 /* get largest number of rows any processor has */ 1265 rlen = mat->rmap->n; 1266 range = mat->rmap->range; 1267 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1268 } else { 1269 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 rlen = mat->rmap->n; 1271 } 1272 1273 /* load up the local row counts */ 1274 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1275 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1276 1277 /* store the row lengths to the file */ 1278 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1279 if (!rank) { 1280 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1281 for (i=1; i<size; i++) { 1282 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1283 rlen = range[i+1] - range[i]; 1284 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1285 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1286 } 1287 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1288 } else { 1289 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1290 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1291 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1292 } 1293 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1294 1295 /* load up the local column indices */ 1296 nzmax = nz; /* th processor needs space a largest processor needs */ 1297 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1298 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1299 cnt = 0; 1300 for (i=0; i<mat->rmap->n; i++) { 1301 for (j=B->i[i]; j<B->i[i+1]; j++) { 1302 if ((col = garray[B->j[j]]) > cstart) break; 1303 column_indices[cnt++] = col; 1304 } 1305 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1306 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1307 } 1308 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1309 1310 /* store the column indices to the file */ 1311 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1312 if (!rank) { 1313 MPI_Status status; 1314 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1315 for (i=1; i<size; i++) { 1316 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1317 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1318 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1319 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1320 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1321 } 1322 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1323 } else { 1324 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1325 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1326 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1327 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1328 } 1329 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1330 1331 /* load up the local column values */ 1332 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1333 cnt = 0; 1334 for (i=0; i<mat->rmap->n; i++) { 1335 for (j=B->i[i]; j<B->i[i+1]; j++) { 1336 if (garray[B->j[j]] > cstart) break; 1337 column_values[cnt++] = B->a[j]; 1338 } 1339 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1340 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1341 } 1342 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1343 1344 /* store the column values to the file */ 1345 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1346 if (!rank) { 1347 MPI_Status status; 1348 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1349 for (i=1; i<size; i++) { 1350 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1351 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1352 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1353 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1354 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1355 } 1356 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1357 } else { 1358 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1359 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1360 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1361 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1362 } 1363 ierr = PetscFree(column_values);CHKERRQ(ierr); 1364 1365 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1366 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #include <petscdraw.h> 1371 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1372 { 1373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1374 PetscErrorCode ierr; 1375 PetscMPIInt rank = aij->rank,size = aij->size; 1376 PetscBool isdraw,iascii,isbinary; 1377 PetscViewer sviewer; 1378 PetscViewerFormat format; 1379 1380 PetscFunctionBegin; 1381 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1382 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1383 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1384 if (iascii) { 1385 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1386 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1387 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1388 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1389 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1390 for (i=0; i<(PetscInt)size; i++) { 1391 nmax = PetscMax(nmax,nz[i]); 1392 nmin = PetscMin(nmin,nz[i]); 1393 navg += nz[i]; 1394 } 1395 ierr = PetscFree(nz);CHKERRQ(ierr); 1396 navg = navg/size; 1397 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1401 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1402 MatInfo info; 1403 PetscBool inodes; 1404 1405 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1406 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1407 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1408 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1409 if (!inodes) { 1410 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1411 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1412 } else { 1413 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1414 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1415 } 1416 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1418 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1420 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1421 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1422 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1423 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1426 PetscInt inodecount,inodelimit,*inodes; 1427 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1428 if (inodes) { 1429 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1430 } else { 1431 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1432 } 1433 PetscFunctionReturn(0); 1434 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1435 PetscFunctionReturn(0); 1436 } 1437 } else if (isbinary) { 1438 if (size == 1) { 1439 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1440 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1441 } else { 1442 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } else if (isdraw) { 1446 PetscDraw draw; 1447 PetscBool isnull; 1448 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1449 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1450 if (isnull) PetscFunctionReturn(0); 1451 } 1452 1453 { 1454 /* assemble the entire matrix onto first processor. */ 1455 Mat A; 1456 Mat_SeqAIJ *Aloc; 1457 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1458 MatScalar *a; 1459 1460 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1461 if (!rank) { 1462 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1463 } else { 1464 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1465 } 1466 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1467 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1468 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1469 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1470 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1471 1472 /* copy over the A part */ 1473 Aloc = (Mat_SeqAIJ*)aij->A->data; 1474 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1475 row = mat->rmap->rstart; 1476 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1477 for (i=0; i<m; i++) { 1478 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1479 row++; 1480 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1481 } 1482 aj = Aloc->j; 1483 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1484 1485 /* copy over the B part */ 1486 Aloc = (Mat_SeqAIJ*)aij->B->data; 1487 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1488 row = mat->rmap->rstart; 1489 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1490 ct = cols; 1491 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1492 for (i=0; i<m; i++) { 1493 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1494 row++; 1495 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1496 } 1497 ierr = PetscFree(ct);CHKERRQ(ierr); 1498 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1499 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1500 /* 1501 Everyone has to call to draw the matrix since the graphics waits are 1502 synchronized across all processors that share the PetscDraw object 1503 */ 1504 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1505 if (!rank) { 1506 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1507 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1508 } 1509 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1510 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1511 ierr = MatDestroy(&A);CHKERRQ(ierr); 1512 } 1513 PetscFunctionReturn(0); 1514 } 1515 1516 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1517 { 1518 PetscErrorCode ierr; 1519 PetscBool iascii,isdraw,issocket,isbinary; 1520 1521 PetscFunctionBegin; 1522 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1523 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1524 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1525 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1526 if (iascii || isdraw || isbinary || issocket) { 1527 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1528 } 1529 PetscFunctionReturn(0); 1530 } 1531 1532 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1533 { 1534 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1535 PetscErrorCode ierr; 1536 Vec bb1 = 0; 1537 PetscBool hasop; 1538 1539 PetscFunctionBegin; 1540 if (flag == SOR_APPLY_UPPER) { 1541 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1542 PetscFunctionReturn(0); 1543 } 1544 1545 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1546 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1547 } 1548 1549 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1550 if (flag & SOR_ZERO_INITIAL_GUESS) { 1551 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1552 its--; 1553 } 1554 1555 while (its--) { 1556 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1557 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1558 1559 /* update rhs: bb1 = bb - B*x */ 1560 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1561 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1562 1563 /* local sweep */ 1564 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1565 } 1566 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1567 if (flag & SOR_ZERO_INITIAL_GUESS) { 1568 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1569 its--; 1570 } 1571 while (its--) { 1572 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1573 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 1575 /* update rhs: bb1 = bb - B*x */ 1576 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1577 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1578 1579 /* local sweep */ 1580 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1581 } 1582 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1583 if (flag & SOR_ZERO_INITIAL_GUESS) { 1584 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1585 its--; 1586 } 1587 while (its--) { 1588 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1589 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1590 1591 /* update rhs: bb1 = bb - B*x */ 1592 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1593 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1594 1595 /* local sweep */ 1596 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1597 } 1598 } else if (flag & SOR_EISENSTAT) { 1599 Vec xx1; 1600 1601 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1602 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1603 1604 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 if (!mat->diag) { 1607 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1608 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1609 } 1610 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1611 if (hasop) { 1612 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1613 } else { 1614 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1615 } 1616 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1617 1618 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1619 1620 /* local sweep */ 1621 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1622 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1623 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1624 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1625 1626 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1627 1628 matin->factorerrortype = mat->A->factorerrortype; 1629 PetscFunctionReturn(0); 1630 } 1631 1632 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1633 { 1634 Mat aA,aB,Aperm; 1635 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1636 PetscScalar *aa,*ba; 1637 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1638 PetscSF rowsf,sf; 1639 IS parcolp = NULL; 1640 PetscBool done; 1641 PetscErrorCode ierr; 1642 1643 PetscFunctionBegin; 1644 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1645 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1646 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1647 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1648 1649 /* Invert row permutation to find out where my rows should go */ 1650 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1651 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1652 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1653 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1654 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1655 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1656 1657 /* Invert column permutation to find out where my columns should go */ 1658 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1659 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1660 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1661 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1662 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1663 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1664 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1665 1666 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1667 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1668 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1669 1670 /* Find out where my gcols should go */ 1671 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1672 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1673 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1674 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1675 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1676 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1677 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1678 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1679 1680 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1681 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1682 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1683 for (i=0; i<m; i++) { 1684 PetscInt row = rdest[i],rowner; 1685 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1686 for (j=ai[i]; j<ai[i+1]; j++) { 1687 PetscInt cowner,col = cdest[aj[j]]; 1688 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1689 if (rowner == cowner) dnnz[i]++; 1690 else onnz[i]++; 1691 } 1692 for (j=bi[i]; j<bi[i+1]; j++) { 1693 PetscInt cowner,col = gcdest[bj[j]]; 1694 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1695 if (rowner == cowner) dnnz[i]++; 1696 else onnz[i]++; 1697 } 1698 } 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1701 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1702 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1703 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1704 1705 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1706 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1707 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1708 for (i=0; i<m; i++) { 1709 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1710 PetscInt j0,rowlen; 1711 rowlen = ai[i+1] - ai[i]; 1712 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1713 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1714 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1715 } 1716 rowlen = bi[i+1] - bi[i]; 1717 for (j0=j=0; j<rowlen; j0=j) { 1718 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1719 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1720 } 1721 } 1722 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1723 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1724 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1725 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1726 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1727 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1728 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1729 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1730 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1731 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1732 *B = Aperm; 1733 PetscFunctionReturn(0); 1734 } 1735 1736 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1737 { 1738 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1739 PetscErrorCode ierr; 1740 1741 PetscFunctionBegin; 1742 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1743 if (ghosts) *ghosts = aij->garray; 1744 PetscFunctionReturn(0); 1745 } 1746 1747 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1748 { 1749 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1750 Mat A = mat->A,B = mat->B; 1751 PetscErrorCode ierr; 1752 PetscReal isend[5],irecv[5]; 1753 1754 PetscFunctionBegin; 1755 info->block_size = 1.0; 1756 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1757 1758 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1759 isend[3] = info->memory; isend[4] = info->mallocs; 1760 1761 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1762 1763 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1764 isend[3] += info->memory; isend[4] += info->mallocs; 1765 if (flag == MAT_LOCAL) { 1766 info->nz_used = isend[0]; 1767 info->nz_allocated = isend[1]; 1768 info->nz_unneeded = isend[2]; 1769 info->memory = isend[3]; 1770 info->mallocs = isend[4]; 1771 } else if (flag == MAT_GLOBAL_MAX) { 1772 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1773 1774 info->nz_used = irecv[0]; 1775 info->nz_allocated = irecv[1]; 1776 info->nz_unneeded = irecv[2]; 1777 info->memory = irecv[3]; 1778 info->mallocs = irecv[4]; 1779 } else if (flag == MAT_GLOBAL_SUM) { 1780 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1781 1782 info->nz_used = irecv[0]; 1783 info->nz_allocated = irecv[1]; 1784 info->nz_unneeded = irecv[2]; 1785 info->memory = irecv[3]; 1786 info->mallocs = irecv[4]; 1787 } 1788 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1789 info->fill_ratio_needed = 0; 1790 info->factor_mallocs = 0; 1791 PetscFunctionReturn(0); 1792 } 1793 1794 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1795 { 1796 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1797 PetscErrorCode ierr; 1798 1799 PetscFunctionBegin; 1800 switch (op) { 1801 case MAT_NEW_NONZERO_LOCATIONS: 1802 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1803 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1804 case MAT_KEEP_NONZERO_PATTERN: 1805 case MAT_NEW_NONZERO_LOCATION_ERR: 1806 case MAT_USE_INODES: 1807 case MAT_IGNORE_ZERO_ENTRIES: 1808 MatCheckPreallocated(A,1); 1809 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1810 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1811 break; 1812 case MAT_ROW_ORIENTED: 1813 MatCheckPreallocated(A,1); 1814 a->roworiented = flg; 1815 1816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1817 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1818 break; 1819 case MAT_NEW_DIAGONALS: 1820 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1821 break; 1822 case MAT_IGNORE_OFF_PROC_ENTRIES: 1823 a->donotstash = flg; 1824 break; 1825 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1826 case MAT_SPD: 1827 case MAT_SYMMETRIC: 1828 case MAT_STRUCTURALLY_SYMMETRIC: 1829 case MAT_HERMITIAN: 1830 case MAT_SYMMETRY_ETERNAL: 1831 break; 1832 case MAT_SUBMAT_SINGLEIS: 1833 A->submat_singleis = flg; 1834 break; 1835 case MAT_STRUCTURE_ONLY: 1836 /* The option is handled directly by MatSetOption() */ 1837 break; 1838 default: 1839 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1840 } 1841 PetscFunctionReturn(0); 1842 } 1843 1844 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1845 { 1846 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1847 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1848 PetscErrorCode ierr; 1849 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1850 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1851 PetscInt *cmap,*idx_p; 1852 1853 PetscFunctionBegin; 1854 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1855 mat->getrowactive = PETSC_TRUE; 1856 1857 if (!mat->rowvalues && (idx || v)) { 1858 /* 1859 allocate enough space to hold information from the longest row. 1860 */ 1861 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1862 PetscInt max = 1,tmp; 1863 for (i=0; i<matin->rmap->n; i++) { 1864 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1865 if (max < tmp) max = tmp; 1866 } 1867 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1868 } 1869 1870 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1871 lrow = row - rstart; 1872 1873 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1874 if (!v) {pvA = 0; pvB = 0;} 1875 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1876 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1877 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1878 nztot = nzA + nzB; 1879 1880 cmap = mat->garray; 1881 if (v || idx) { 1882 if (nztot) { 1883 /* Sort by increasing column numbers, assuming A and B already sorted */ 1884 PetscInt imark = -1; 1885 if (v) { 1886 *v = v_p = mat->rowvalues; 1887 for (i=0; i<nzB; i++) { 1888 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1889 else break; 1890 } 1891 imark = i; 1892 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1893 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1894 } 1895 if (idx) { 1896 *idx = idx_p = mat->rowindices; 1897 if (imark > -1) { 1898 for (i=0; i<imark; i++) { 1899 idx_p[i] = cmap[cworkB[i]]; 1900 } 1901 } else { 1902 for (i=0; i<nzB; i++) { 1903 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1904 else break; 1905 } 1906 imark = i; 1907 } 1908 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1909 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1910 } 1911 } else { 1912 if (idx) *idx = 0; 1913 if (v) *v = 0; 1914 } 1915 } 1916 *nz = nztot; 1917 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1918 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1923 { 1924 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1925 1926 PetscFunctionBegin; 1927 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1928 aij->getrowactive = PETSC_FALSE; 1929 PetscFunctionReturn(0); 1930 } 1931 1932 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1933 { 1934 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1935 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1936 PetscErrorCode ierr; 1937 PetscInt i,j,cstart = mat->cmap->rstart; 1938 PetscReal sum = 0.0; 1939 MatScalar *v; 1940 1941 PetscFunctionBegin; 1942 if (aij->size == 1) { 1943 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1944 } else { 1945 if (type == NORM_FROBENIUS) { 1946 v = amat->a; 1947 for (i=0; i<amat->nz; i++) { 1948 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1949 } 1950 v = bmat->a; 1951 for (i=0; i<bmat->nz; i++) { 1952 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1953 } 1954 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1955 *norm = PetscSqrtReal(*norm); 1956 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1957 } else if (type == NORM_1) { /* max column norm */ 1958 PetscReal *tmp,*tmp2; 1959 PetscInt *jj,*garray = aij->garray; 1960 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1961 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1962 *norm = 0.0; 1963 v = amat->a; jj = amat->j; 1964 for (j=0; j<amat->nz; j++) { 1965 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1966 } 1967 v = bmat->a; jj = bmat->j; 1968 for (j=0; j<bmat->nz; j++) { 1969 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1970 } 1971 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1972 for (j=0; j<mat->cmap->N; j++) { 1973 if (tmp2[j] > *norm) *norm = tmp2[j]; 1974 } 1975 ierr = PetscFree(tmp);CHKERRQ(ierr); 1976 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1977 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1978 } else if (type == NORM_INFINITY) { /* max row norm */ 1979 PetscReal ntemp = 0.0; 1980 for (j=0; j<aij->A->rmap->n; j++) { 1981 v = amat->a + amat->i[j]; 1982 sum = 0.0; 1983 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1984 sum += PetscAbsScalar(*v); v++; 1985 } 1986 v = bmat->a + bmat->i[j]; 1987 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1988 sum += PetscAbsScalar(*v); v++; 1989 } 1990 if (sum > ntemp) ntemp = sum; 1991 } 1992 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1993 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1994 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1995 } 1996 PetscFunctionReturn(0); 1997 } 1998 1999 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2000 { 2001 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2002 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2003 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol; 2004 PetscErrorCode ierr; 2005 Mat B,A_diag,*B_diag; 2006 MatScalar *array; 2007 2008 PetscFunctionBegin; 2009 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2010 ai = Aloc->i; aj = Aloc->j; 2011 bi = Bloc->i; bj = Bloc->j; 2012 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2013 PetscInt *d_nnz,*g_nnz,*o_nnz; 2014 PetscSFNode *oloc; 2015 PETSC_UNUSED PetscSF sf; 2016 2017 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2018 /* compute d_nnz for preallocation */ 2019 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2020 for (i=0; i<ai[ma]; i++) { 2021 d_nnz[aj[i]]++; 2022 } 2023 /* compute local off-diagonal contributions */ 2024 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2025 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2026 /* map those to global */ 2027 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2028 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2029 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2030 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2031 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2032 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2033 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2034 2035 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2036 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2037 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2038 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2039 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2040 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2041 } else { 2042 B = *matout; 2043 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2044 } 2045 2046 b = (Mat_MPIAIJ*)B->data; 2047 A_diag = a->A; 2048 B_diag = &b->A; 2049 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2050 A_diag_ncol = A_diag->cmap->N; 2051 B_diag_ilen = sub_B_diag->ilen; 2052 B_diag_i = sub_B_diag->i; 2053 2054 /* Set ilen for diagonal of B */ 2055 for (i=0; i<A_diag_ncol; i++) { 2056 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2057 } 2058 2059 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2060 very quickly (=without using MatSetValues), because all writes are local. */ 2061 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2062 2063 /* copy over the B part */ 2064 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2065 array = Bloc->a; 2066 row = A->rmap->rstart; 2067 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2068 cols_tmp = cols; 2069 for (i=0; i<mb; i++) { 2070 ncol = bi[i+1]-bi[i]; 2071 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2072 row++; 2073 array += ncol; cols_tmp += ncol; 2074 } 2075 ierr = PetscFree(cols);CHKERRQ(ierr); 2076 2077 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2080 *matout = B; 2081 } else { 2082 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2088 { 2089 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2090 Mat a = aij->A,b = aij->B; 2091 PetscErrorCode ierr; 2092 PetscInt s1,s2,s3; 2093 2094 PetscFunctionBegin; 2095 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2096 if (rr) { 2097 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2098 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2099 /* Overlap communication with computation. */ 2100 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2101 } 2102 if (ll) { 2103 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2104 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2105 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2106 } 2107 /* scale the diagonal block */ 2108 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2109 2110 if (rr) { 2111 /* Do a scatter end and then right scale the off-diagonal block */ 2112 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2113 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2119 { 2120 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2129 { 2130 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2131 Mat a,b,c,d; 2132 PetscBool flg; 2133 PetscErrorCode ierr; 2134 2135 PetscFunctionBegin; 2136 a = matA->A; b = matA->B; 2137 c = matB->A; d = matB->B; 2138 2139 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2140 if (flg) { 2141 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2142 } 2143 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2148 { 2149 PetscErrorCode ierr; 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2151 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2152 2153 PetscFunctionBegin; 2154 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2155 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2156 /* because of the column compression in the off-processor part of the matrix a->B, 2157 the number of columns in a->B and b->B may be different, hence we cannot call 2158 the MatCopy() directly on the two parts. If need be, we can provide a more 2159 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2160 then copying the submatrices */ 2161 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2162 } else { 2163 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2164 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2165 } 2166 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 /* 2180 Computes the number of nonzeros per row needed for preallocation when X and Y 2181 have different nonzero structure. 2182 */ 2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2184 { 2185 PetscInt i,j,k,nzx,nzy; 2186 2187 PetscFunctionBegin; 2188 /* Set the number of nonzeros in the new matrix */ 2189 for (i=0; i<m; i++) { 2190 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2191 nzx = xi[i+1] - xi[i]; 2192 nzy = yi[i+1] - yi[i]; 2193 nnz[i] = 0; 2194 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2195 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2196 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2197 nnz[i]++; 2198 } 2199 for (; k<nzy; k++) nnz[i]++; 2200 } 2201 PetscFunctionReturn(0); 2202 } 2203 2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2206 { 2207 PetscErrorCode ierr; 2208 PetscInt m = Y->rmap->N; 2209 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2210 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2211 2212 PetscFunctionBegin; 2213 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2214 PetscFunctionReturn(0); 2215 } 2216 2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2218 { 2219 PetscErrorCode ierr; 2220 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2221 PetscBLASInt bnz,one=1; 2222 Mat_SeqAIJ *x,*y; 2223 2224 PetscFunctionBegin; 2225 if (str == SAME_NONZERO_PATTERN) { 2226 PetscScalar alpha = a; 2227 x = (Mat_SeqAIJ*)xx->A->data; 2228 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2229 y = (Mat_SeqAIJ*)yy->A->data; 2230 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2231 x = (Mat_SeqAIJ*)xx->B->data; 2232 y = (Mat_SeqAIJ*)yy->B->data; 2233 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2234 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2235 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2236 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2237 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2238 } else { 2239 Mat B; 2240 PetscInt *nnz_d,*nnz_o; 2241 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2242 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2243 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2244 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2245 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2246 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2247 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2248 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2249 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2250 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2251 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2252 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2253 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2254 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2255 } 2256 PetscFunctionReturn(0); 2257 } 2258 2259 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2260 2261 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2262 { 2263 #if defined(PETSC_USE_COMPLEX) 2264 PetscErrorCode ierr; 2265 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2266 2267 PetscFunctionBegin; 2268 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2269 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2270 #else 2271 PetscFunctionBegin; 2272 #endif 2273 PetscFunctionReturn(0); 2274 } 2275 2276 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2277 { 2278 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2279 PetscErrorCode ierr; 2280 2281 PetscFunctionBegin; 2282 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2283 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2284 PetscFunctionReturn(0); 2285 } 2286 2287 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2288 { 2289 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2290 PetscErrorCode ierr; 2291 2292 PetscFunctionBegin; 2293 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2294 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2295 PetscFunctionReturn(0); 2296 } 2297 2298 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2299 { 2300 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2301 PetscErrorCode ierr; 2302 PetscInt i,*idxb = 0; 2303 PetscScalar *va,*vb; 2304 Vec vtmp; 2305 2306 PetscFunctionBegin; 2307 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2308 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2309 if (idx) { 2310 for (i=0; i<A->rmap->n; i++) { 2311 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2312 } 2313 } 2314 2315 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2316 if (idx) { 2317 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2318 } 2319 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2320 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2321 2322 for (i=0; i<A->rmap->n; i++) { 2323 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2324 va[i] = vb[i]; 2325 if (idx) idx[i] = a->garray[idxb[i]]; 2326 } 2327 } 2328 2329 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2330 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2331 ierr = PetscFree(idxb);CHKERRQ(ierr); 2332 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2333 PetscFunctionReturn(0); 2334 } 2335 2336 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2337 { 2338 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2339 PetscErrorCode ierr; 2340 PetscInt i,*idxb = 0; 2341 PetscScalar *va,*vb; 2342 Vec vtmp; 2343 2344 PetscFunctionBegin; 2345 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2346 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2347 if (idx) { 2348 for (i=0; i<A->cmap->n; i++) { 2349 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2350 } 2351 } 2352 2353 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2354 if (idx) { 2355 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2356 } 2357 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2358 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2359 2360 for (i=0; i<A->rmap->n; i++) { 2361 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2362 va[i] = vb[i]; 2363 if (idx) idx[i] = a->garray[idxb[i]]; 2364 } 2365 } 2366 2367 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2368 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2369 ierr = PetscFree(idxb);CHKERRQ(ierr); 2370 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2371 PetscFunctionReturn(0); 2372 } 2373 2374 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2375 { 2376 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2377 PetscInt n = A->rmap->n; 2378 PetscInt cstart = A->cmap->rstart; 2379 PetscInt *cmap = mat->garray; 2380 PetscInt *diagIdx, *offdiagIdx; 2381 Vec diagV, offdiagV; 2382 PetscScalar *a, *diagA, *offdiagA; 2383 PetscInt r; 2384 PetscErrorCode ierr; 2385 2386 PetscFunctionBegin; 2387 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2388 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2389 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2390 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2391 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2392 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2393 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2394 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2395 for (r = 0; r < n; ++r) { 2396 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2397 a[r] = diagA[r]; 2398 idx[r] = cstart + diagIdx[r]; 2399 } else { 2400 a[r] = offdiagA[r]; 2401 idx[r] = cmap[offdiagIdx[r]]; 2402 } 2403 } 2404 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2405 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2406 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2407 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2408 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2409 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2410 PetscFunctionReturn(0); 2411 } 2412 2413 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2414 { 2415 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2416 PetscInt n = A->rmap->n; 2417 PetscInt cstart = A->cmap->rstart; 2418 PetscInt *cmap = mat->garray; 2419 PetscInt *diagIdx, *offdiagIdx; 2420 Vec diagV, offdiagV; 2421 PetscScalar *a, *diagA, *offdiagA; 2422 PetscInt r; 2423 PetscErrorCode ierr; 2424 2425 PetscFunctionBegin; 2426 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2427 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2428 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2429 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2430 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2431 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2432 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2433 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2434 for (r = 0; r < n; ++r) { 2435 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2436 a[r] = diagA[r]; 2437 idx[r] = cstart + diagIdx[r]; 2438 } else { 2439 a[r] = offdiagA[r]; 2440 idx[r] = cmap[offdiagIdx[r]]; 2441 } 2442 } 2443 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2444 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2445 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2446 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2447 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2448 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2453 { 2454 PetscErrorCode ierr; 2455 Mat *dummy; 2456 2457 PetscFunctionBegin; 2458 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2459 *newmat = *dummy; 2460 ierr = PetscFree(dummy);CHKERRQ(ierr); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2465 { 2466 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2467 PetscErrorCode ierr; 2468 2469 PetscFunctionBegin; 2470 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2471 A->factorerrortype = a->A->factorerrortype; 2472 PetscFunctionReturn(0); 2473 } 2474 2475 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2476 { 2477 PetscErrorCode ierr; 2478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2479 2480 PetscFunctionBegin; 2481 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2482 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2483 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2484 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2485 PetscFunctionReturn(0); 2486 } 2487 2488 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2489 { 2490 PetscFunctionBegin; 2491 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2492 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2493 PetscFunctionReturn(0); 2494 } 2495 2496 /*@ 2497 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2498 2499 Collective on Mat 2500 2501 Input Parameters: 2502 + A - the matrix 2503 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2504 2505 Level: advanced 2506 2507 @*/ 2508 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2509 { 2510 PetscErrorCode ierr; 2511 2512 PetscFunctionBegin; 2513 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2514 PetscFunctionReturn(0); 2515 } 2516 2517 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2518 { 2519 PetscErrorCode ierr; 2520 PetscBool sc = PETSC_FALSE,flg; 2521 2522 PetscFunctionBegin; 2523 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2524 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2525 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2526 if (flg) { 2527 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2528 } 2529 ierr = PetscOptionsTail();CHKERRQ(ierr); 2530 PetscFunctionReturn(0); 2531 } 2532 2533 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2534 { 2535 PetscErrorCode ierr; 2536 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2537 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2538 2539 PetscFunctionBegin; 2540 if (!Y->preallocated) { 2541 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2542 } else if (!aij->nz) { 2543 PetscInt nonew = aij->nonew; 2544 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2545 aij->nonew = nonew; 2546 } 2547 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2548 PetscFunctionReturn(0); 2549 } 2550 2551 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2552 { 2553 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2554 PetscErrorCode ierr; 2555 2556 PetscFunctionBegin; 2557 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2558 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2559 if (d) { 2560 PetscInt rstart; 2561 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2562 *d += rstart; 2563 2564 } 2565 PetscFunctionReturn(0); 2566 } 2567 2568 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2569 { 2570 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2571 PetscErrorCode ierr; 2572 2573 PetscFunctionBegin; 2574 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2575 PetscFunctionReturn(0); 2576 } 2577 2578 /* -------------------------------------------------------------------*/ 2579 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2580 MatGetRow_MPIAIJ, 2581 MatRestoreRow_MPIAIJ, 2582 MatMult_MPIAIJ, 2583 /* 4*/ MatMultAdd_MPIAIJ, 2584 MatMultTranspose_MPIAIJ, 2585 MatMultTransposeAdd_MPIAIJ, 2586 0, 2587 0, 2588 0, 2589 /*10*/ 0, 2590 0, 2591 0, 2592 MatSOR_MPIAIJ, 2593 MatTranspose_MPIAIJ, 2594 /*15*/ MatGetInfo_MPIAIJ, 2595 MatEqual_MPIAIJ, 2596 MatGetDiagonal_MPIAIJ, 2597 MatDiagonalScale_MPIAIJ, 2598 MatNorm_MPIAIJ, 2599 /*20*/ MatAssemblyBegin_MPIAIJ, 2600 MatAssemblyEnd_MPIAIJ, 2601 MatSetOption_MPIAIJ, 2602 MatZeroEntries_MPIAIJ, 2603 /*24*/ MatZeroRows_MPIAIJ, 2604 0, 2605 0, 2606 0, 2607 0, 2608 /*29*/ MatSetUp_MPIAIJ, 2609 0, 2610 0, 2611 MatGetDiagonalBlock_MPIAIJ, 2612 0, 2613 /*34*/ MatDuplicate_MPIAIJ, 2614 0, 2615 0, 2616 0, 2617 0, 2618 /*39*/ MatAXPY_MPIAIJ, 2619 MatCreateSubMatrices_MPIAIJ, 2620 MatIncreaseOverlap_MPIAIJ, 2621 MatGetValues_MPIAIJ, 2622 MatCopy_MPIAIJ, 2623 /*44*/ MatGetRowMax_MPIAIJ, 2624 MatScale_MPIAIJ, 2625 MatShift_MPIAIJ, 2626 MatDiagonalSet_MPIAIJ, 2627 MatZeroRowsColumns_MPIAIJ, 2628 /*49*/ MatSetRandom_MPIAIJ, 2629 0, 2630 0, 2631 0, 2632 0, 2633 /*54*/ MatFDColoringCreate_MPIXAIJ, 2634 0, 2635 MatSetUnfactored_MPIAIJ, 2636 MatPermute_MPIAIJ, 2637 0, 2638 /*59*/ MatCreateSubMatrix_MPIAIJ, 2639 MatDestroy_MPIAIJ, 2640 MatView_MPIAIJ, 2641 0, 2642 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2643 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2644 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2645 0, 2646 0, 2647 0, 2648 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2649 MatGetRowMinAbs_MPIAIJ, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /*75*/ MatFDColoringApply_AIJ, 2655 MatSetFromOptions_MPIAIJ, 2656 0, 2657 0, 2658 MatFindZeroDiagonals_MPIAIJ, 2659 /*80*/ 0, 2660 0, 2661 0, 2662 /*83*/ MatLoad_MPIAIJ, 2663 MatIsSymmetric_MPIAIJ, 2664 0, 2665 0, 2666 0, 2667 0, 2668 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2669 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2670 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2671 MatPtAP_MPIAIJ_MPIAIJ, 2672 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2673 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /*99*/ 0, 2679 0, 2680 0, 2681 MatConjugate_MPIAIJ, 2682 0, 2683 /*104*/MatSetValuesRow_MPIAIJ, 2684 MatRealPart_MPIAIJ, 2685 MatImaginaryPart_MPIAIJ, 2686 0, 2687 0, 2688 /*109*/0, 2689 0, 2690 MatGetRowMin_MPIAIJ, 2691 0, 2692 MatMissingDiagonal_MPIAIJ, 2693 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2694 0, 2695 MatGetGhosts_MPIAIJ, 2696 0, 2697 0, 2698 /*119*/0, 2699 0, 2700 0, 2701 0, 2702 MatGetMultiProcBlock_MPIAIJ, 2703 /*124*/MatFindNonzeroRows_MPIAIJ, 2704 MatGetColumnNorms_MPIAIJ, 2705 MatInvertBlockDiagonal_MPIAIJ, 2706 MatInvertVariableBlockDiagonal_MPIAIJ, 2707 MatCreateSubMatricesMPI_MPIAIJ, 2708 /*129*/0, 2709 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2710 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2711 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2712 0, 2713 /*134*/0, 2714 0, 2715 MatRARt_MPIAIJ_MPIAIJ, 2716 0, 2717 0, 2718 /*139*/MatSetBlockSizes_MPIAIJ, 2719 0, 2720 0, 2721 MatFDColoringSetUp_MPIXAIJ, 2722 MatFindOffBlockDiagonalEntries_MPIAIJ, 2723 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2724 }; 2725 2726 /* ----------------------------------------------------------------------------------------*/ 2727 2728 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2729 { 2730 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2731 PetscErrorCode ierr; 2732 2733 PetscFunctionBegin; 2734 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2735 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2736 PetscFunctionReturn(0); 2737 } 2738 2739 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2740 { 2741 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2742 PetscErrorCode ierr; 2743 2744 PetscFunctionBegin; 2745 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2746 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2747 PetscFunctionReturn(0); 2748 } 2749 2750 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2751 { 2752 Mat_MPIAIJ *b; 2753 PetscErrorCode ierr; 2754 2755 PetscFunctionBegin; 2756 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2757 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2758 b = (Mat_MPIAIJ*)B->data; 2759 2760 #if defined(PETSC_USE_CTABLE) 2761 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2762 #else 2763 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2764 #endif 2765 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2766 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2767 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2768 2769 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2770 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2771 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2772 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2773 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2774 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2775 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2776 2777 if (!B->preallocated) { 2778 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2779 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2780 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2781 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2782 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2783 } 2784 2785 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2786 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2787 B->preallocated = PETSC_TRUE; 2788 B->was_assembled = PETSC_FALSE; 2789 B->assembled = PETSC_FALSE;; 2790 PetscFunctionReturn(0); 2791 } 2792 2793 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2794 { 2795 Mat_MPIAIJ *b; 2796 PetscErrorCode ierr; 2797 2798 PetscFunctionBegin; 2799 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2800 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2801 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2802 b = (Mat_MPIAIJ*)B->data; 2803 2804 #if defined(PETSC_USE_CTABLE) 2805 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2806 #else 2807 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2808 #endif 2809 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2810 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2811 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2812 2813 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2814 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2815 B->preallocated = PETSC_TRUE; 2816 B->was_assembled = PETSC_FALSE; 2817 B->assembled = PETSC_FALSE; 2818 PetscFunctionReturn(0); 2819 } 2820 2821 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2822 { 2823 Mat mat; 2824 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2825 PetscErrorCode ierr; 2826 2827 PetscFunctionBegin; 2828 *newmat = 0; 2829 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2830 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2831 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2832 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2833 a = (Mat_MPIAIJ*)mat->data; 2834 2835 mat->factortype = matin->factortype; 2836 mat->assembled = PETSC_TRUE; 2837 mat->insertmode = NOT_SET_VALUES; 2838 mat->preallocated = PETSC_TRUE; 2839 2840 a->size = oldmat->size; 2841 a->rank = oldmat->rank; 2842 a->donotstash = oldmat->donotstash; 2843 a->roworiented = oldmat->roworiented; 2844 a->rowindices = 0; 2845 a->rowvalues = 0; 2846 a->getrowactive = PETSC_FALSE; 2847 2848 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2849 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2850 2851 if (oldmat->colmap) { 2852 #if defined(PETSC_USE_CTABLE) 2853 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2854 #else 2855 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2856 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2857 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2858 #endif 2859 } else a->colmap = 0; 2860 if (oldmat->garray) { 2861 PetscInt len; 2862 len = oldmat->B->cmap->n; 2863 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2864 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2865 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2866 } else a->garray = 0; 2867 2868 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2869 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2870 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2871 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2872 2873 if (oldmat->Mvctx_mpi1) { 2874 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2875 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2876 } 2877 2878 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2879 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2880 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2881 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2882 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2883 *newmat = mat; 2884 PetscFunctionReturn(0); 2885 } 2886 2887 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2888 { 2889 PetscBool isbinary, ishdf5; 2890 PetscErrorCode ierr; 2891 2892 PetscFunctionBegin; 2893 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2894 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2895 /* force binary viewer to load .info file if it has not yet done so */ 2896 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2897 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2898 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2899 if (isbinary) { 2900 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2901 } else if (ishdf5) { 2902 #if defined(PETSC_HAVE_HDF5) 2903 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2904 #else 2905 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2906 #endif 2907 } else { 2908 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2909 } 2910 PetscFunctionReturn(0); 2911 } 2912 2913 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2914 { 2915 PetscScalar *vals,*svals; 2916 MPI_Comm comm; 2917 PetscErrorCode ierr; 2918 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2919 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2920 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2921 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2922 PetscInt cend,cstart,n,*rowners; 2923 int fd; 2924 PetscInt bs = newMat->rmap->bs; 2925 2926 PetscFunctionBegin; 2927 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2928 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2929 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2930 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2931 if (!rank) { 2932 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2933 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2934 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2935 } 2936 2937 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2938 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2939 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2940 if (bs < 0) bs = 1; 2941 2942 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2943 M = header[1]; N = header[2]; 2944 2945 /* If global sizes are set, check if they are consistent with that given in the file */ 2946 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2947 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2948 2949 /* determine ownership of all (block) rows */ 2950 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2951 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2952 else m = newMat->rmap->n; /* Set by user */ 2953 2954 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2955 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2956 2957 /* First process needs enough room for process with most rows */ 2958 if (!rank) { 2959 mmax = rowners[1]; 2960 for (i=2; i<=size; i++) { 2961 mmax = PetscMax(mmax, rowners[i]); 2962 } 2963 } else mmax = -1; /* unused, but compilers complain */ 2964 2965 rowners[0] = 0; 2966 for (i=2; i<=size; i++) { 2967 rowners[i] += rowners[i-1]; 2968 } 2969 rstart = rowners[rank]; 2970 rend = rowners[rank+1]; 2971 2972 /* distribute row lengths to all processors */ 2973 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2974 if (!rank) { 2975 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2976 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2977 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2978 for (j=0; j<m; j++) { 2979 procsnz[0] += ourlens[j]; 2980 } 2981 for (i=1; i<size; i++) { 2982 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2983 /* calculate the number of nonzeros on each processor */ 2984 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2985 procsnz[i] += rowlengths[j]; 2986 } 2987 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2988 } 2989 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2990 } else { 2991 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2992 } 2993 2994 if (!rank) { 2995 /* determine max buffer needed and allocate it */ 2996 maxnz = 0; 2997 for (i=0; i<size; i++) { 2998 maxnz = PetscMax(maxnz,procsnz[i]); 2999 } 3000 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3001 3002 /* read in my part of the matrix column indices */ 3003 nz = procsnz[0]; 3004 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3005 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3006 3007 /* read in every one elses and ship off */ 3008 for (i=1; i<size; i++) { 3009 nz = procsnz[i]; 3010 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3011 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3012 } 3013 ierr = PetscFree(cols);CHKERRQ(ierr); 3014 } else { 3015 /* determine buffer space needed for message */ 3016 nz = 0; 3017 for (i=0; i<m; i++) { 3018 nz += ourlens[i]; 3019 } 3020 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3021 3022 /* receive message of column indices*/ 3023 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3024 } 3025 3026 /* determine column ownership if matrix is not square */ 3027 if (N != M) { 3028 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3029 else n = newMat->cmap->n; 3030 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3031 cstart = cend - n; 3032 } else { 3033 cstart = rstart; 3034 cend = rend; 3035 n = cend - cstart; 3036 } 3037 3038 /* loop over local rows, determining number of off diagonal entries */ 3039 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3040 jj = 0; 3041 for (i=0; i<m; i++) { 3042 for (j=0; j<ourlens[i]; j++) { 3043 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3044 jj++; 3045 } 3046 } 3047 3048 for (i=0; i<m; i++) { 3049 ourlens[i] -= offlens[i]; 3050 } 3051 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3052 3053 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3054 3055 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3056 3057 for (i=0; i<m; i++) { 3058 ourlens[i] += offlens[i]; 3059 } 3060 3061 if (!rank) { 3062 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3063 3064 /* read in my part of the matrix numerical values */ 3065 nz = procsnz[0]; 3066 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3067 3068 /* insert into matrix */ 3069 jj = rstart; 3070 smycols = mycols; 3071 svals = vals; 3072 for (i=0; i<m; i++) { 3073 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3074 smycols += ourlens[i]; 3075 svals += ourlens[i]; 3076 jj++; 3077 } 3078 3079 /* read in other processors and ship out */ 3080 for (i=1; i<size; i++) { 3081 nz = procsnz[i]; 3082 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3083 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3084 } 3085 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3086 } else { 3087 /* receive numeric values */ 3088 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3089 3090 /* receive message of values*/ 3091 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3092 3093 /* insert into matrix */ 3094 jj = rstart; 3095 smycols = mycols; 3096 svals = vals; 3097 for (i=0; i<m; i++) { 3098 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3099 smycols += ourlens[i]; 3100 svals += ourlens[i]; 3101 jj++; 3102 } 3103 } 3104 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3105 ierr = PetscFree(vals);CHKERRQ(ierr); 3106 ierr = PetscFree(mycols);CHKERRQ(ierr); 3107 ierr = PetscFree(rowners);CHKERRQ(ierr); 3108 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3109 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3110 PetscFunctionReturn(0); 3111 } 3112 3113 /* Not scalable because of ISAllGather() unless getting all columns. */ 3114 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3115 { 3116 PetscErrorCode ierr; 3117 IS iscol_local; 3118 PetscBool isstride; 3119 PetscMPIInt lisstride=0,gisstride; 3120 3121 PetscFunctionBegin; 3122 /* check if we are grabbing all columns*/ 3123 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3124 3125 if (isstride) { 3126 PetscInt start,len,mstart,mlen; 3127 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3128 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3129 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3130 if (mstart == start && mlen-mstart == len) lisstride = 1; 3131 } 3132 3133 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3134 if (gisstride) { 3135 PetscInt N; 3136 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3137 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3138 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3139 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3140 } else { 3141 PetscInt cbs; 3142 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3143 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3144 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3145 } 3146 3147 *isseq = iscol_local; 3148 PetscFunctionReturn(0); 3149 } 3150 3151 /* 3152 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3153 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3154 3155 Input Parameters: 3156 mat - matrix 3157 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3158 i.e., mat->rstart <= isrow[i] < mat->rend 3159 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3160 i.e., mat->cstart <= iscol[i] < mat->cend 3161 Output Parameter: 3162 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3163 iscol_o - sequential column index set for retrieving mat->B 3164 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3165 */ 3166 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3167 { 3168 PetscErrorCode ierr; 3169 Vec x,cmap; 3170 const PetscInt *is_idx; 3171 PetscScalar *xarray,*cmaparray; 3172 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3173 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3174 Mat B=a->B; 3175 Vec lvec=a->lvec,lcmap; 3176 PetscInt i,cstart,cend,Bn=B->cmap->N; 3177 MPI_Comm comm; 3178 VecScatter Mvctx=a->Mvctx; 3179 3180 PetscFunctionBegin; 3181 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3182 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3183 3184 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3185 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3186 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3187 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3188 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3189 3190 /* Get start indices */ 3191 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3192 isstart -= ncols; 3193 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3194 3195 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3196 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3197 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3198 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3199 for (i=0; i<ncols; i++) { 3200 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3201 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3202 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3203 } 3204 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3205 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3206 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3207 3208 /* Get iscol_d */ 3209 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3210 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3211 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3212 3213 /* Get isrow_d */ 3214 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3215 rstart = mat->rmap->rstart; 3216 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3217 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3218 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3219 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3220 3221 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3222 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3223 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3224 3225 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3226 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3227 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3228 3229 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3230 3231 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3232 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3233 3234 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3235 /* off-process column indices */ 3236 count = 0; 3237 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3238 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3239 3240 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3241 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3242 for (i=0; i<Bn; i++) { 3243 if (PetscRealPart(xarray[i]) > -1.0) { 3244 idx[count] = i; /* local column index in off-diagonal part B */ 3245 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3246 count++; 3247 } 3248 } 3249 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3250 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3251 3252 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3253 /* cannot ensure iscol_o has same blocksize as iscol! */ 3254 3255 ierr = PetscFree(idx);CHKERRQ(ierr); 3256 *garray = cmap1; 3257 3258 ierr = VecDestroy(&x);CHKERRQ(ierr); 3259 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3260 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3261 PetscFunctionReturn(0); 3262 } 3263 3264 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3265 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3266 { 3267 PetscErrorCode ierr; 3268 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3269 Mat M = NULL; 3270 MPI_Comm comm; 3271 IS iscol_d,isrow_d,iscol_o; 3272 Mat Asub = NULL,Bsub = NULL; 3273 PetscInt n; 3274 3275 PetscFunctionBegin; 3276 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3277 3278 if (call == MAT_REUSE_MATRIX) { 3279 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3280 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3281 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3282 3283 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3284 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3285 3286 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3287 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3288 3289 /* Update diagonal and off-diagonal portions of submat */ 3290 asub = (Mat_MPIAIJ*)(*submat)->data; 3291 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3292 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3293 if (n) { 3294 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3295 } 3296 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3297 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3298 3299 } else { /* call == MAT_INITIAL_MATRIX) */ 3300 const PetscInt *garray; 3301 PetscInt BsubN; 3302 3303 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3304 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3305 3306 /* Create local submatrices Asub and Bsub */ 3307 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3308 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3309 3310 /* Create submatrix M */ 3311 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3312 3313 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3314 asub = (Mat_MPIAIJ*)M->data; 3315 3316 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3317 n = asub->B->cmap->N; 3318 if (BsubN > n) { 3319 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3320 const PetscInt *idx; 3321 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3322 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3323 3324 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3325 j = 0; 3326 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3327 for (i=0; i<n; i++) { 3328 if (j >= BsubN) break; 3329 while (subgarray[i] > garray[j]) j++; 3330 3331 if (subgarray[i] == garray[j]) { 3332 idx_new[i] = idx[j++]; 3333 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3334 } 3335 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3336 3337 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3338 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3339 3340 } else if (BsubN < n) { 3341 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3342 } 3343 3344 ierr = PetscFree(garray);CHKERRQ(ierr); 3345 *submat = M; 3346 3347 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3348 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3349 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3350 3351 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3352 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3353 3354 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3355 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3356 } 3357 PetscFunctionReturn(0); 3358 } 3359 3360 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3361 { 3362 PetscErrorCode ierr; 3363 IS iscol_local=NULL,isrow_d; 3364 PetscInt csize; 3365 PetscInt n,i,j,start,end; 3366 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3367 MPI_Comm comm; 3368 3369 PetscFunctionBegin; 3370 /* If isrow has same processor distribution as mat, 3371 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3372 if (call == MAT_REUSE_MATRIX) { 3373 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3374 if (isrow_d) { 3375 sameRowDist = PETSC_TRUE; 3376 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3377 } else { 3378 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3379 if (iscol_local) { 3380 sameRowDist = PETSC_TRUE; 3381 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3382 } 3383 } 3384 } else { 3385 /* Check if isrow has same processor distribution as mat */ 3386 sameDist[0] = PETSC_FALSE; 3387 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3388 if (!n) { 3389 sameDist[0] = PETSC_TRUE; 3390 } else { 3391 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3392 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3393 if (i >= start && j < end) { 3394 sameDist[0] = PETSC_TRUE; 3395 } 3396 } 3397 3398 /* Check if iscol has same processor distribution as mat */ 3399 sameDist[1] = PETSC_FALSE; 3400 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3401 if (!n) { 3402 sameDist[1] = PETSC_TRUE; 3403 } else { 3404 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3405 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3406 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3407 } 3408 3409 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3410 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3411 sameRowDist = tsameDist[0]; 3412 } 3413 3414 if (sameRowDist) { 3415 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3416 /* isrow and iscol have same processor distribution as mat */ 3417 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3418 PetscFunctionReturn(0); 3419 } else { /* sameRowDist */ 3420 /* isrow has same processor distribution as mat */ 3421 if (call == MAT_INITIAL_MATRIX) { 3422 PetscBool sorted; 3423 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3424 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3425 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3426 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3427 3428 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3429 if (sorted) { 3430 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3431 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3432 PetscFunctionReturn(0); 3433 } 3434 } else { /* call == MAT_REUSE_MATRIX */ 3435 IS iscol_sub; 3436 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3437 if (iscol_sub) { 3438 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3439 PetscFunctionReturn(0); 3440 } 3441 } 3442 } 3443 } 3444 3445 /* General case: iscol -> iscol_local which has global size of iscol */ 3446 if (call == MAT_REUSE_MATRIX) { 3447 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3448 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3449 } else { 3450 if (!iscol_local) { 3451 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3452 } 3453 } 3454 3455 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3456 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3457 3458 if (call == MAT_INITIAL_MATRIX) { 3459 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3460 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3461 } 3462 PetscFunctionReturn(0); 3463 } 3464 3465 /*@C 3466 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3467 and "off-diagonal" part of the matrix in CSR format. 3468 3469 Collective on MPI_Comm 3470 3471 Input Parameters: 3472 + comm - MPI communicator 3473 . A - "diagonal" portion of matrix 3474 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3475 - garray - global index of B columns 3476 3477 Output Parameter: 3478 . mat - the matrix, with input A as its local diagonal matrix 3479 Level: advanced 3480 3481 Notes: 3482 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3483 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3484 3485 .seealso: MatCreateMPIAIJWithSplitArrays() 3486 @*/ 3487 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3488 { 3489 PetscErrorCode ierr; 3490 Mat_MPIAIJ *maij; 3491 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3492 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3493 PetscScalar *oa=b->a; 3494 Mat Bnew; 3495 PetscInt m,n,N; 3496 3497 PetscFunctionBegin; 3498 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3499 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3500 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3501 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3502 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3503 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3504 3505 /* Get global columns of mat */ 3506 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3507 3508 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3509 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3510 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3511 maij = (Mat_MPIAIJ*)(*mat)->data; 3512 3513 (*mat)->preallocated = PETSC_TRUE; 3514 3515 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3516 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3517 3518 /* Set A as diagonal portion of *mat */ 3519 maij->A = A; 3520 3521 nz = oi[m]; 3522 for (i=0; i<nz; i++) { 3523 col = oj[i]; 3524 oj[i] = garray[col]; 3525 } 3526 3527 /* Set Bnew as off-diagonal portion of *mat */ 3528 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3529 bnew = (Mat_SeqAIJ*)Bnew->data; 3530 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3531 maij->B = Bnew; 3532 3533 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3534 3535 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3536 b->free_a = PETSC_FALSE; 3537 b->free_ij = PETSC_FALSE; 3538 ierr = MatDestroy(&B);CHKERRQ(ierr); 3539 3540 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3541 bnew->free_a = PETSC_TRUE; 3542 bnew->free_ij = PETSC_TRUE; 3543 3544 /* condense columns of maij->B */ 3545 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3546 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3547 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3548 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3549 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3550 PetscFunctionReturn(0); 3551 } 3552 3553 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3554 3555 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3556 { 3557 PetscErrorCode ierr; 3558 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3559 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3560 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3561 Mat M,Msub,B=a->B; 3562 MatScalar *aa; 3563 Mat_SeqAIJ *aij; 3564 PetscInt *garray = a->garray,*colsub,Ncols; 3565 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3566 IS iscol_sub,iscmap; 3567 const PetscInt *is_idx,*cmap; 3568 PetscBool allcolumns=PETSC_FALSE; 3569 MPI_Comm comm; 3570 3571 PetscFunctionBegin; 3572 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3573 3574 if (call == MAT_REUSE_MATRIX) { 3575 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3576 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3577 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3578 3579 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3580 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3581 3582 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3583 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3584 3585 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3586 3587 } else { /* call == MAT_INITIAL_MATRIX) */ 3588 PetscBool flg; 3589 3590 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3591 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3592 3593 /* (1) iscol -> nonscalable iscol_local */ 3594 /* Check for special case: each processor gets entire matrix columns */ 3595 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3596 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3597 if (allcolumns) { 3598 iscol_sub = iscol_local; 3599 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3600 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3601 3602 } else { 3603 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3604 PetscInt *idx,*cmap1,k; 3605 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3606 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3607 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3608 count = 0; 3609 k = 0; 3610 for (i=0; i<Ncols; i++) { 3611 j = is_idx[i]; 3612 if (j >= cstart && j < cend) { 3613 /* diagonal part of mat */ 3614 idx[count] = j; 3615 cmap1[count++] = i; /* column index in submat */ 3616 } else if (Bn) { 3617 /* off-diagonal part of mat */ 3618 if (j == garray[k]) { 3619 idx[count] = j; 3620 cmap1[count++] = i; /* column index in submat */ 3621 } else if (j > garray[k]) { 3622 while (j > garray[k] && k < Bn-1) k++; 3623 if (j == garray[k]) { 3624 idx[count] = j; 3625 cmap1[count++] = i; /* column index in submat */ 3626 } 3627 } 3628 } 3629 } 3630 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3631 3632 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3633 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3634 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3635 3636 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3637 } 3638 3639 /* (3) Create sequential Msub */ 3640 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3641 } 3642 3643 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3644 aij = (Mat_SeqAIJ*)(Msub)->data; 3645 ii = aij->i; 3646 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3647 3648 /* 3649 m - number of local rows 3650 Ncols - number of columns (same on all processors) 3651 rstart - first row in new global matrix generated 3652 */ 3653 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3654 3655 if (call == MAT_INITIAL_MATRIX) { 3656 /* (4) Create parallel newmat */ 3657 PetscMPIInt rank,size; 3658 PetscInt csize; 3659 3660 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3661 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3662 3663 /* 3664 Determine the number of non-zeros in the diagonal and off-diagonal 3665 portions of the matrix in order to do correct preallocation 3666 */ 3667 3668 /* first get start and end of "diagonal" columns */ 3669 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3670 if (csize == PETSC_DECIDE) { 3671 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3672 if (mglobal == Ncols) { /* square matrix */ 3673 nlocal = m; 3674 } else { 3675 nlocal = Ncols/size + ((Ncols % size) > rank); 3676 } 3677 } else { 3678 nlocal = csize; 3679 } 3680 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3681 rstart = rend - nlocal; 3682 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3683 3684 /* next, compute all the lengths */ 3685 jj = aij->j; 3686 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3687 olens = dlens + m; 3688 for (i=0; i<m; i++) { 3689 jend = ii[i+1] - ii[i]; 3690 olen = 0; 3691 dlen = 0; 3692 for (j=0; j<jend; j++) { 3693 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3694 else dlen++; 3695 jj++; 3696 } 3697 olens[i] = olen; 3698 dlens[i] = dlen; 3699 } 3700 3701 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3702 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3703 3704 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3705 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3706 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3707 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3708 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3709 ierr = PetscFree(dlens);CHKERRQ(ierr); 3710 3711 } else { /* call == MAT_REUSE_MATRIX */ 3712 M = *newmat; 3713 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3714 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3715 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3716 /* 3717 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3718 rather than the slower MatSetValues(). 3719 */ 3720 M->was_assembled = PETSC_TRUE; 3721 M->assembled = PETSC_FALSE; 3722 } 3723 3724 /* (5) Set values of Msub to *newmat */ 3725 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3726 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3727 3728 jj = aij->j; 3729 aa = aij->a; 3730 for (i=0; i<m; i++) { 3731 row = rstart + i; 3732 nz = ii[i+1] - ii[i]; 3733 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3734 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3735 jj += nz; aa += nz; 3736 } 3737 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3738 3739 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3740 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3741 3742 ierr = PetscFree(colsub);CHKERRQ(ierr); 3743 3744 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3745 if (call == MAT_INITIAL_MATRIX) { 3746 *newmat = M; 3747 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3748 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3749 3750 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3751 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3752 3753 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3754 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3755 3756 if (iscol_local) { 3757 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3758 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3759 } 3760 } 3761 PetscFunctionReturn(0); 3762 } 3763 3764 /* 3765 Not great since it makes two copies of the submatrix, first an SeqAIJ 3766 in local and then by concatenating the local matrices the end result. 3767 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3768 3769 Note: This requires a sequential iscol with all indices. 3770 */ 3771 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3772 { 3773 PetscErrorCode ierr; 3774 PetscMPIInt rank,size; 3775 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3776 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3777 Mat M,Mreuse; 3778 MatScalar *aa,*vwork; 3779 MPI_Comm comm; 3780 Mat_SeqAIJ *aij; 3781 PetscBool colflag,allcolumns=PETSC_FALSE; 3782 3783 PetscFunctionBegin; 3784 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3785 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3786 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3787 3788 /* Check for special case: each processor gets entire matrix columns */ 3789 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3790 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3791 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3792 3793 if (call == MAT_REUSE_MATRIX) { 3794 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3795 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3796 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3797 } else { 3798 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3799 } 3800 3801 /* 3802 m - number of local rows 3803 n - number of columns (same on all processors) 3804 rstart - first row in new global matrix generated 3805 */ 3806 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3807 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3808 if (call == MAT_INITIAL_MATRIX) { 3809 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3810 ii = aij->i; 3811 jj = aij->j; 3812 3813 /* 3814 Determine the number of non-zeros in the diagonal and off-diagonal 3815 portions of the matrix in order to do correct preallocation 3816 */ 3817 3818 /* first get start and end of "diagonal" columns */ 3819 if (csize == PETSC_DECIDE) { 3820 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3821 if (mglobal == n) { /* square matrix */ 3822 nlocal = m; 3823 } else { 3824 nlocal = n/size + ((n % size) > rank); 3825 } 3826 } else { 3827 nlocal = csize; 3828 } 3829 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3830 rstart = rend - nlocal; 3831 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3832 3833 /* next, compute all the lengths */ 3834 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3835 olens = dlens + m; 3836 for (i=0; i<m; i++) { 3837 jend = ii[i+1] - ii[i]; 3838 olen = 0; 3839 dlen = 0; 3840 for (j=0; j<jend; j++) { 3841 if (*jj < rstart || *jj >= rend) olen++; 3842 else dlen++; 3843 jj++; 3844 } 3845 olens[i] = olen; 3846 dlens[i] = dlen; 3847 } 3848 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3849 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3850 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3851 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3852 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3853 ierr = PetscFree(dlens);CHKERRQ(ierr); 3854 } else { 3855 PetscInt ml,nl; 3856 3857 M = *newmat; 3858 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3859 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3860 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3861 /* 3862 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3863 rather than the slower MatSetValues(). 3864 */ 3865 M->was_assembled = PETSC_TRUE; 3866 M->assembled = PETSC_FALSE; 3867 } 3868 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3869 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3870 ii = aij->i; 3871 jj = aij->j; 3872 aa = aij->a; 3873 for (i=0; i<m; i++) { 3874 row = rstart + i; 3875 nz = ii[i+1] - ii[i]; 3876 cwork = jj; jj += nz; 3877 vwork = aa; aa += nz; 3878 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3879 } 3880 3881 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3882 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3883 *newmat = M; 3884 3885 /* save submatrix used in processor for next request */ 3886 if (call == MAT_INITIAL_MATRIX) { 3887 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3888 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3889 } 3890 PetscFunctionReturn(0); 3891 } 3892 3893 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3894 { 3895 PetscInt m,cstart, cend,j,nnz,i,d; 3896 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3897 const PetscInt *JJ; 3898 PetscScalar *values; 3899 PetscErrorCode ierr; 3900 PetscBool nooffprocentries; 3901 3902 PetscFunctionBegin; 3903 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3904 3905 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3906 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3907 m = B->rmap->n; 3908 cstart = B->cmap->rstart; 3909 cend = B->cmap->rend; 3910 rstart = B->rmap->rstart; 3911 3912 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3913 3914 #if defined(PETSC_USE_DEBUG) 3915 for (i=0; i<m && Ii; i++) { 3916 nnz = Ii[i+1]- Ii[i]; 3917 JJ = J + Ii[i]; 3918 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3919 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3920 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3921 } 3922 #endif 3923 3924 for (i=0; i<m && Ii; i++) { 3925 nnz = Ii[i+1]- Ii[i]; 3926 JJ = J + Ii[i]; 3927 nnz_max = PetscMax(nnz_max,nnz); 3928 d = 0; 3929 for (j=0; j<nnz; j++) { 3930 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3931 } 3932 d_nnz[i] = d; 3933 o_nnz[i] = nnz - d; 3934 } 3935 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3936 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3937 3938 if (v) values = (PetscScalar*)v; 3939 else { 3940 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3941 } 3942 3943 for (i=0; i<m && Ii; i++) { 3944 ii = i + rstart; 3945 nnz = Ii[i+1]- Ii[i]; 3946 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3947 } 3948 nooffprocentries = B->nooffprocentries; 3949 B->nooffprocentries = PETSC_TRUE; 3950 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3951 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3952 B->nooffprocentries = nooffprocentries; 3953 3954 if (!v) { 3955 ierr = PetscFree(values);CHKERRQ(ierr); 3956 } 3957 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3958 PetscFunctionReturn(0); 3959 } 3960 3961 /*@ 3962 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3963 (the default parallel PETSc format). 3964 3965 Collective on MPI_Comm 3966 3967 Input Parameters: 3968 + B - the matrix 3969 . i - the indices into j for the start of each local row (starts with zero) 3970 . j - the column indices for each local row (starts with zero) 3971 - v - optional values in the matrix 3972 3973 Level: developer 3974 3975 Notes: 3976 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3977 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3978 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3979 3980 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3981 3982 The format which is used for the sparse matrix input, is equivalent to a 3983 row-major ordering.. i.e for the following matrix, the input data expected is 3984 as shown 3985 3986 $ 1 0 0 3987 $ 2 0 3 P0 3988 $ ------- 3989 $ 4 5 6 P1 3990 $ 3991 $ Process0 [P0]: rows_owned=[0,1] 3992 $ i = {0,1,3} [size = nrow+1 = 2+1] 3993 $ j = {0,0,2} [size = 3] 3994 $ v = {1,2,3} [size = 3] 3995 $ 3996 $ Process1 [P1]: rows_owned=[2] 3997 $ i = {0,3} [size = nrow+1 = 1+1] 3998 $ j = {0,1,2} [size = 3] 3999 $ v = {4,5,6} [size = 3] 4000 4001 .keywords: matrix, aij, compressed row, sparse, parallel 4002 4003 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4004 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4005 @*/ 4006 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4007 { 4008 PetscErrorCode ierr; 4009 4010 PetscFunctionBegin; 4011 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4012 PetscFunctionReturn(0); 4013 } 4014 4015 /*@C 4016 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4017 (the default parallel PETSc format). For good matrix assembly performance 4018 the user should preallocate the matrix storage by setting the parameters 4019 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4020 performance can be increased by more than a factor of 50. 4021 4022 Collective on MPI_Comm 4023 4024 Input Parameters: 4025 + B - the matrix 4026 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4027 (same value is used for all local rows) 4028 . d_nnz - array containing the number of nonzeros in the various rows of the 4029 DIAGONAL portion of the local submatrix (possibly different for each row) 4030 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4031 The size of this array is equal to the number of local rows, i.e 'm'. 4032 For matrices that will be factored, you must leave room for (and set) 4033 the diagonal entry even if it is zero. 4034 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4035 submatrix (same value is used for all local rows). 4036 - o_nnz - array containing the number of nonzeros in the various rows of the 4037 OFF-DIAGONAL portion of the local submatrix (possibly different for 4038 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4039 structure. The size of this array is equal to the number 4040 of local rows, i.e 'm'. 4041 4042 If the *_nnz parameter is given then the *_nz parameter is ignored 4043 4044 The AIJ format (also called the Yale sparse matrix format or 4045 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4046 storage. The stored row and column indices begin with zero. 4047 See Users-Manual: ch_mat for details. 4048 4049 The parallel matrix is partitioned such that the first m0 rows belong to 4050 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4051 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4052 4053 The DIAGONAL portion of the local submatrix of a processor can be defined 4054 as the submatrix which is obtained by extraction the part corresponding to 4055 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4056 first row that belongs to the processor, r2 is the last row belonging to 4057 the this processor, and c1-c2 is range of indices of the local part of a 4058 vector suitable for applying the matrix to. This is an mxn matrix. In the 4059 common case of a square matrix, the row and column ranges are the same and 4060 the DIAGONAL part is also square. The remaining portion of the local 4061 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4062 4063 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4064 4065 You can call MatGetInfo() to get information on how effective the preallocation was; 4066 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4067 You can also run with the option -info and look for messages with the string 4068 malloc in them to see if additional memory allocation was needed. 4069 4070 Example usage: 4071 4072 Consider the following 8x8 matrix with 34 non-zero values, that is 4073 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4074 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4075 as follows: 4076 4077 .vb 4078 1 2 0 | 0 3 0 | 0 4 4079 Proc0 0 5 6 | 7 0 0 | 8 0 4080 9 0 10 | 11 0 0 | 12 0 4081 ------------------------------------- 4082 13 0 14 | 15 16 17 | 0 0 4083 Proc1 0 18 0 | 19 20 21 | 0 0 4084 0 0 0 | 22 23 0 | 24 0 4085 ------------------------------------- 4086 Proc2 25 26 27 | 0 0 28 | 29 0 4087 30 0 0 | 31 32 33 | 0 34 4088 .ve 4089 4090 This can be represented as a collection of submatrices as: 4091 4092 .vb 4093 A B C 4094 D E F 4095 G H I 4096 .ve 4097 4098 Where the submatrices A,B,C are owned by proc0, D,E,F are 4099 owned by proc1, G,H,I are owned by proc2. 4100 4101 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4102 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4103 The 'M','N' parameters are 8,8, and have the same values on all procs. 4104 4105 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4106 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4107 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4108 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4109 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4110 matrix, ans [DF] as another SeqAIJ matrix. 4111 4112 When d_nz, o_nz parameters are specified, d_nz storage elements are 4113 allocated for every row of the local diagonal submatrix, and o_nz 4114 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4115 One way to choose d_nz and o_nz is to use the max nonzerors per local 4116 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4117 In this case, the values of d_nz,o_nz are: 4118 .vb 4119 proc0 : dnz = 2, o_nz = 2 4120 proc1 : dnz = 3, o_nz = 2 4121 proc2 : dnz = 1, o_nz = 4 4122 .ve 4123 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4124 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4125 for proc3. i.e we are using 12+15+10=37 storage locations to store 4126 34 values. 4127 4128 When d_nnz, o_nnz parameters are specified, the storage is specified 4129 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4130 In the above case the values for d_nnz,o_nnz are: 4131 .vb 4132 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4133 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4134 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4135 .ve 4136 Here the space allocated is sum of all the above values i.e 34, and 4137 hence pre-allocation is perfect. 4138 4139 Level: intermediate 4140 4141 .keywords: matrix, aij, compressed row, sparse, parallel 4142 4143 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4144 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4145 @*/ 4146 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4147 { 4148 PetscErrorCode ierr; 4149 4150 PetscFunctionBegin; 4151 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4152 PetscValidType(B,1); 4153 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4154 PetscFunctionReturn(0); 4155 } 4156 4157 /*@ 4158 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4159 CSR format the local rows. 4160 4161 Collective on MPI_Comm 4162 4163 Input Parameters: 4164 + comm - MPI communicator 4165 . m - number of local rows (Cannot be PETSC_DECIDE) 4166 . n - This value should be the same as the local size used in creating the 4167 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4168 calculated if N is given) For square matrices n is almost always m. 4169 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4170 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4171 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4172 . j - column indices 4173 - a - matrix values 4174 4175 Output Parameter: 4176 . mat - the matrix 4177 4178 Level: intermediate 4179 4180 Notes: 4181 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4182 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4183 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4184 4185 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4186 4187 The format which is used for the sparse matrix input, is equivalent to a 4188 row-major ordering.. i.e for the following matrix, the input data expected is 4189 as shown 4190 4191 $ 1 0 0 4192 $ 2 0 3 P0 4193 $ ------- 4194 $ 4 5 6 P1 4195 $ 4196 $ Process0 [P0]: rows_owned=[0,1] 4197 $ i = {0,1,3} [size = nrow+1 = 2+1] 4198 $ j = {0,0,2} [size = 3] 4199 $ v = {1,2,3} [size = 3] 4200 $ 4201 $ Process1 [P1]: rows_owned=[2] 4202 $ i = {0,3} [size = nrow+1 = 1+1] 4203 $ j = {0,1,2} [size = 3] 4204 $ v = {4,5,6} [size = 3] 4205 4206 .keywords: matrix, aij, compressed row, sparse, parallel 4207 4208 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4209 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4210 @*/ 4211 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4212 { 4213 PetscErrorCode ierr; 4214 4215 PetscFunctionBegin; 4216 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4217 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4218 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4219 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4220 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4221 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4222 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4223 PetscFunctionReturn(0); 4224 } 4225 4226 /*@C 4227 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4228 (the default parallel PETSc format). For good matrix assembly performance 4229 the user should preallocate the matrix storage by setting the parameters 4230 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4231 performance can be increased by more than a factor of 50. 4232 4233 Collective on MPI_Comm 4234 4235 Input Parameters: 4236 + comm - MPI communicator 4237 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4238 This value should be the same as the local size used in creating the 4239 y vector for the matrix-vector product y = Ax. 4240 . n - This value should be the same as the local size used in creating the 4241 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4242 calculated if N is given) For square matrices n is almost always m. 4243 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4244 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4245 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4246 (same value is used for all local rows) 4247 . d_nnz - array containing the number of nonzeros in the various rows of the 4248 DIAGONAL portion of the local submatrix (possibly different for each row) 4249 or NULL, if d_nz is used to specify the nonzero structure. 4250 The size of this array is equal to the number of local rows, i.e 'm'. 4251 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4252 submatrix (same value is used for all local rows). 4253 - o_nnz - array containing the number of nonzeros in the various rows of the 4254 OFF-DIAGONAL portion of the local submatrix (possibly different for 4255 each row) or NULL, if o_nz is used to specify the nonzero 4256 structure. The size of this array is equal to the number 4257 of local rows, i.e 'm'. 4258 4259 Output Parameter: 4260 . A - the matrix 4261 4262 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4263 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4264 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4265 4266 Notes: 4267 If the *_nnz parameter is given then the *_nz parameter is ignored 4268 4269 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4270 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4271 storage requirements for this matrix. 4272 4273 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4274 processor than it must be used on all processors that share the object for 4275 that argument. 4276 4277 The user MUST specify either the local or global matrix dimensions 4278 (possibly both). 4279 4280 The parallel matrix is partitioned across processors such that the 4281 first m0 rows belong to process 0, the next m1 rows belong to 4282 process 1, the next m2 rows belong to process 2 etc.. where 4283 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4284 values corresponding to [m x N] submatrix. 4285 4286 The columns are logically partitioned with the n0 columns belonging 4287 to 0th partition, the next n1 columns belonging to the next 4288 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4289 4290 The DIAGONAL portion of the local submatrix on any given processor 4291 is the submatrix corresponding to the rows and columns m,n 4292 corresponding to the given processor. i.e diagonal matrix on 4293 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4294 etc. The remaining portion of the local submatrix [m x (N-n)] 4295 constitute the OFF-DIAGONAL portion. The example below better 4296 illustrates this concept. 4297 4298 For a square global matrix we define each processor's diagonal portion 4299 to be its local rows and the corresponding columns (a square submatrix); 4300 each processor's off-diagonal portion encompasses the remainder of the 4301 local matrix (a rectangular submatrix). 4302 4303 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4304 4305 When calling this routine with a single process communicator, a matrix of 4306 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4307 type of communicator, use the construction mechanism 4308 .vb 4309 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4310 .ve 4311 4312 $ MatCreate(...,&A); 4313 $ MatSetType(A,MATMPIAIJ); 4314 $ MatSetSizes(A, m,n,M,N); 4315 $ MatMPIAIJSetPreallocation(A,...); 4316 4317 By default, this format uses inodes (identical nodes) when possible. 4318 We search for consecutive rows with the same nonzero structure, thereby 4319 reusing matrix information to achieve increased efficiency. 4320 4321 Options Database Keys: 4322 + -mat_no_inode - Do not use inodes 4323 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4324 4325 4326 4327 Example usage: 4328 4329 Consider the following 8x8 matrix with 34 non-zero values, that is 4330 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4331 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4332 as follows 4333 4334 .vb 4335 1 2 0 | 0 3 0 | 0 4 4336 Proc0 0 5 6 | 7 0 0 | 8 0 4337 9 0 10 | 11 0 0 | 12 0 4338 ------------------------------------- 4339 13 0 14 | 15 16 17 | 0 0 4340 Proc1 0 18 0 | 19 20 21 | 0 0 4341 0 0 0 | 22 23 0 | 24 0 4342 ------------------------------------- 4343 Proc2 25 26 27 | 0 0 28 | 29 0 4344 30 0 0 | 31 32 33 | 0 34 4345 .ve 4346 4347 This can be represented as a collection of submatrices as 4348 4349 .vb 4350 A B C 4351 D E F 4352 G H I 4353 .ve 4354 4355 Where the submatrices A,B,C are owned by proc0, D,E,F are 4356 owned by proc1, G,H,I are owned by proc2. 4357 4358 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4359 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4360 The 'M','N' parameters are 8,8, and have the same values on all procs. 4361 4362 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4363 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4364 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4365 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4366 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4367 matrix, ans [DF] as another SeqAIJ matrix. 4368 4369 When d_nz, o_nz parameters are specified, d_nz storage elements are 4370 allocated for every row of the local diagonal submatrix, and o_nz 4371 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4372 One way to choose d_nz and o_nz is to use the max nonzerors per local 4373 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4374 In this case, the values of d_nz,o_nz are 4375 .vb 4376 proc0 : dnz = 2, o_nz = 2 4377 proc1 : dnz = 3, o_nz = 2 4378 proc2 : dnz = 1, o_nz = 4 4379 .ve 4380 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4381 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4382 for proc3. i.e we are using 12+15+10=37 storage locations to store 4383 34 values. 4384 4385 When d_nnz, o_nnz parameters are specified, the storage is specified 4386 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4387 In the above case the values for d_nnz,o_nnz are 4388 .vb 4389 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4390 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4391 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4392 .ve 4393 Here the space allocated is sum of all the above values i.e 34, and 4394 hence pre-allocation is perfect. 4395 4396 Level: intermediate 4397 4398 .keywords: matrix, aij, compressed row, sparse, parallel 4399 4400 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4401 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4402 @*/ 4403 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4404 { 4405 PetscErrorCode ierr; 4406 PetscMPIInt size; 4407 4408 PetscFunctionBegin; 4409 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4410 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4411 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4412 if (size > 1) { 4413 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4414 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4415 } else { 4416 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4417 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4418 } 4419 PetscFunctionReturn(0); 4420 } 4421 4422 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4423 { 4424 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4425 PetscBool flg; 4426 PetscErrorCode ierr; 4427 4428 PetscFunctionBegin; 4429 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4430 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4431 if (Ad) *Ad = a->A; 4432 if (Ao) *Ao = a->B; 4433 if (colmap) *colmap = a->garray; 4434 PetscFunctionReturn(0); 4435 } 4436 4437 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4438 { 4439 PetscErrorCode ierr; 4440 PetscInt m,N,i,rstart,nnz,Ii; 4441 PetscInt *indx; 4442 PetscScalar *values; 4443 4444 PetscFunctionBegin; 4445 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4446 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4447 PetscInt *dnz,*onz,sum,bs,cbs; 4448 4449 if (n == PETSC_DECIDE) { 4450 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4451 } 4452 /* Check sum(n) = N */ 4453 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4454 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4455 4456 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4457 rstart -= m; 4458 4459 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4460 for (i=0; i<m; i++) { 4461 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4462 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4463 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4464 } 4465 4466 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4467 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4468 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4469 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4470 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4471 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4472 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4473 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4474 } 4475 4476 /* numeric phase */ 4477 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4478 for (i=0; i<m; i++) { 4479 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4480 Ii = i + rstart; 4481 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4482 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4483 } 4484 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4485 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4486 PetscFunctionReturn(0); 4487 } 4488 4489 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4490 { 4491 PetscErrorCode ierr; 4492 PetscMPIInt rank; 4493 PetscInt m,N,i,rstart,nnz; 4494 size_t len; 4495 const PetscInt *indx; 4496 PetscViewer out; 4497 char *name; 4498 Mat B; 4499 const PetscScalar *values; 4500 4501 PetscFunctionBegin; 4502 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4503 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4504 /* Should this be the type of the diagonal block of A? */ 4505 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4506 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4507 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4508 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4509 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4510 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4511 for (i=0; i<m; i++) { 4512 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4513 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4514 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4515 } 4516 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4517 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4518 4519 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4520 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4521 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4522 sprintf(name,"%s.%d",outfile,rank); 4523 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4524 ierr = PetscFree(name);CHKERRQ(ierr); 4525 ierr = MatView(B,out);CHKERRQ(ierr); 4526 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4527 ierr = MatDestroy(&B);CHKERRQ(ierr); 4528 PetscFunctionReturn(0); 4529 } 4530 4531 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4532 { 4533 PetscErrorCode ierr; 4534 Mat_Merge_SeqsToMPI *merge; 4535 PetscContainer container; 4536 4537 PetscFunctionBegin; 4538 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4539 if (container) { 4540 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4541 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4542 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4543 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4544 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4545 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4546 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4547 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4548 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4549 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4550 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4551 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4552 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4553 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4554 ierr = PetscFree(merge);CHKERRQ(ierr); 4555 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4556 } 4557 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4558 PetscFunctionReturn(0); 4559 } 4560 4561 #include <../src/mat/utils/freespace.h> 4562 #include <petscbt.h> 4563 4564 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4565 { 4566 PetscErrorCode ierr; 4567 MPI_Comm comm; 4568 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4569 PetscMPIInt size,rank,taga,*len_s; 4570 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4571 PetscInt proc,m; 4572 PetscInt **buf_ri,**buf_rj; 4573 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4574 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4575 MPI_Request *s_waits,*r_waits; 4576 MPI_Status *status; 4577 MatScalar *aa=a->a; 4578 MatScalar **abuf_r,*ba_i; 4579 Mat_Merge_SeqsToMPI *merge; 4580 PetscContainer container; 4581 4582 PetscFunctionBegin; 4583 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4584 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4585 4586 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4587 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4588 4589 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4590 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4591 4592 bi = merge->bi; 4593 bj = merge->bj; 4594 buf_ri = merge->buf_ri; 4595 buf_rj = merge->buf_rj; 4596 4597 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4598 owners = merge->rowmap->range; 4599 len_s = merge->len_s; 4600 4601 /* send and recv matrix values */ 4602 /*-----------------------------*/ 4603 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4604 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4605 4606 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4607 for (proc=0,k=0; proc<size; proc++) { 4608 if (!len_s[proc]) continue; 4609 i = owners[proc]; 4610 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4611 k++; 4612 } 4613 4614 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4615 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4616 ierr = PetscFree(status);CHKERRQ(ierr); 4617 4618 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4619 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4620 4621 /* insert mat values of mpimat */ 4622 /*----------------------------*/ 4623 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4624 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4625 4626 for (k=0; k<merge->nrecv; k++) { 4627 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4628 nrows = *(buf_ri_k[k]); 4629 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4630 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4631 } 4632 4633 /* set values of ba */ 4634 m = merge->rowmap->n; 4635 for (i=0; i<m; i++) { 4636 arow = owners[rank] + i; 4637 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4638 bnzi = bi[i+1] - bi[i]; 4639 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4640 4641 /* add local non-zero vals of this proc's seqmat into ba */ 4642 anzi = ai[arow+1] - ai[arow]; 4643 aj = a->j + ai[arow]; 4644 aa = a->a + ai[arow]; 4645 nextaj = 0; 4646 for (j=0; nextaj<anzi; j++) { 4647 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4648 ba_i[j] += aa[nextaj++]; 4649 } 4650 } 4651 4652 /* add received vals into ba */ 4653 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4654 /* i-th row */ 4655 if (i == *nextrow[k]) { 4656 anzi = *(nextai[k]+1) - *nextai[k]; 4657 aj = buf_rj[k] + *(nextai[k]); 4658 aa = abuf_r[k] + *(nextai[k]); 4659 nextaj = 0; 4660 for (j=0; nextaj<anzi; j++) { 4661 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4662 ba_i[j] += aa[nextaj++]; 4663 } 4664 } 4665 nextrow[k]++; nextai[k]++; 4666 } 4667 } 4668 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4669 } 4670 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4671 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4672 4673 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4674 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4675 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4676 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4677 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4678 PetscFunctionReturn(0); 4679 } 4680 4681 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4682 { 4683 PetscErrorCode ierr; 4684 Mat B_mpi; 4685 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4686 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4687 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4688 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4689 PetscInt len,proc,*dnz,*onz,bs,cbs; 4690 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4691 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4692 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4693 MPI_Status *status; 4694 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4695 PetscBT lnkbt; 4696 Mat_Merge_SeqsToMPI *merge; 4697 PetscContainer container; 4698 4699 PetscFunctionBegin; 4700 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4701 4702 /* make sure it is a PETSc comm */ 4703 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4704 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4705 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4706 4707 ierr = PetscNew(&merge);CHKERRQ(ierr); 4708 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4709 4710 /* determine row ownership */ 4711 /*---------------------------------------------------------*/ 4712 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4713 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4714 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4715 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4716 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4717 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4718 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4719 4720 m = merge->rowmap->n; 4721 owners = merge->rowmap->range; 4722 4723 /* determine the number of messages to send, their lengths */ 4724 /*---------------------------------------------------------*/ 4725 len_s = merge->len_s; 4726 4727 len = 0; /* length of buf_si[] */ 4728 merge->nsend = 0; 4729 for (proc=0; proc<size; proc++) { 4730 len_si[proc] = 0; 4731 if (proc == rank) { 4732 len_s[proc] = 0; 4733 } else { 4734 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4735 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4736 } 4737 if (len_s[proc]) { 4738 merge->nsend++; 4739 nrows = 0; 4740 for (i=owners[proc]; i<owners[proc+1]; i++) { 4741 if (ai[i+1] > ai[i]) nrows++; 4742 } 4743 len_si[proc] = 2*(nrows+1); 4744 len += len_si[proc]; 4745 } 4746 } 4747 4748 /* determine the number and length of messages to receive for ij-structure */ 4749 /*-------------------------------------------------------------------------*/ 4750 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4751 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4752 4753 /* post the Irecv of j-structure */ 4754 /*-------------------------------*/ 4755 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4756 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4757 4758 /* post the Isend of j-structure */ 4759 /*--------------------------------*/ 4760 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4761 4762 for (proc=0, k=0; proc<size; proc++) { 4763 if (!len_s[proc]) continue; 4764 i = owners[proc]; 4765 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4766 k++; 4767 } 4768 4769 /* receives and sends of j-structure are complete */ 4770 /*------------------------------------------------*/ 4771 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4772 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4773 4774 /* send and recv i-structure */ 4775 /*---------------------------*/ 4776 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4777 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4778 4779 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4780 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4781 for (proc=0,k=0; proc<size; proc++) { 4782 if (!len_s[proc]) continue; 4783 /* form outgoing message for i-structure: 4784 buf_si[0]: nrows to be sent 4785 [1:nrows]: row index (global) 4786 [nrows+1:2*nrows+1]: i-structure index 4787 */ 4788 /*-------------------------------------------*/ 4789 nrows = len_si[proc]/2 - 1; 4790 buf_si_i = buf_si + nrows+1; 4791 buf_si[0] = nrows; 4792 buf_si_i[0] = 0; 4793 nrows = 0; 4794 for (i=owners[proc]; i<owners[proc+1]; i++) { 4795 anzi = ai[i+1] - ai[i]; 4796 if (anzi) { 4797 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4798 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4799 nrows++; 4800 } 4801 } 4802 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4803 k++; 4804 buf_si += len_si[proc]; 4805 } 4806 4807 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4808 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4809 4810 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4811 for (i=0; i<merge->nrecv; i++) { 4812 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4813 } 4814 4815 ierr = PetscFree(len_si);CHKERRQ(ierr); 4816 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4817 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4818 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4819 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4820 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4821 ierr = PetscFree(status);CHKERRQ(ierr); 4822 4823 /* compute a local seq matrix in each processor */ 4824 /*----------------------------------------------*/ 4825 /* allocate bi array and free space for accumulating nonzero column info */ 4826 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4827 bi[0] = 0; 4828 4829 /* create and initialize a linked list */ 4830 nlnk = N+1; 4831 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4832 4833 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4834 len = ai[owners[rank+1]] - ai[owners[rank]]; 4835 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4836 4837 current_space = free_space; 4838 4839 /* determine symbolic info for each local row */ 4840 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4841 4842 for (k=0; k<merge->nrecv; k++) { 4843 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4844 nrows = *buf_ri_k[k]; 4845 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4846 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4847 } 4848 4849 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4850 len = 0; 4851 for (i=0; i<m; i++) { 4852 bnzi = 0; 4853 /* add local non-zero cols of this proc's seqmat into lnk */ 4854 arow = owners[rank] + i; 4855 anzi = ai[arow+1] - ai[arow]; 4856 aj = a->j + ai[arow]; 4857 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4858 bnzi += nlnk; 4859 /* add received col data into lnk */ 4860 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4861 if (i == *nextrow[k]) { /* i-th row */ 4862 anzi = *(nextai[k]+1) - *nextai[k]; 4863 aj = buf_rj[k] + *nextai[k]; 4864 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4865 bnzi += nlnk; 4866 nextrow[k]++; nextai[k]++; 4867 } 4868 } 4869 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4870 4871 /* if free space is not available, make more free space */ 4872 if (current_space->local_remaining<bnzi) { 4873 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4874 nspacedouble++; 4875 } 4876 /* copy data into free space, then initialize lnk */ 4877 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4878 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4879 4880 current_space->array += bnzi; 4881 current_space->local_used += bnzi; 4882 current_space->local_remaining -= bnzi; 4883 4884 bi[i+1] = bi[i] + bnzi; 4885 } 4886 4887 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4888 4889 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4890 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4891 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4892 4893 /* create symbolic parallel matrix B_mpi */ 4894 /*---------------------------------------*/ 4895 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4896 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4897 if (n==PETSC_DECIDE) { 4898 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4899 } else { 4900 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4901 } 4902 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4903 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4904 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4905 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4906 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4907 4908 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4909 B_mpi->assembled = PETSC_FALSE; 4910 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4911 merge->bi = bi; 4912 merge->bj = bj; 4913 merge->buf_ri = buf_ri; 4914 merge->buf_rj = buf_rj; 4915 merge->coi = NULL; 4916 merge->coj = NULL; 4917 merge->owners_co = NULL; 4918 4919 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4920 4921 /* attach the supporting struct to B_mpi for reuse */ 4922 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4923 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4924 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4925 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4926 *mpimat = B_mpi; 4927 4928 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4929 PetscFunctionReturn(0); 4930 } 4931 4932 /*@C 4933 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4934 matrices from each processor 4935 4936 Collective on MPI_Comm 4937 4938 Input Parameters: 4939 + comm - the communicators the parallel matrix will live on 4940 . seqmat - the input sequential matrices 4941 . m - number of local rows (or PETSC_DECIDE) 4942 . n - number of local columns (or PETSC_DECIDE) 4943 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4944 4945 Output Parameter: 4946 . mpimat - the parallel matrix generated 4947 4948 Level: advanced 4949 4950 Notes: 4951 The dimensions of the sequential matrix in each processor MUST be the same. 4952 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4953 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4954 @*/ 4955 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4956 { 4957 PetscErrorCode ierr; 4958 PetscMPIInt size; 4959 4960 PetscFunctionBegin; 4961 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4962 if (size == 1) { 4963 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4964 if (scall == MAT_INITIAL_MATRIX) { 4965 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4966 } else { 4967 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4968 } 4969 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4970 PetscFunctionReturn(0); 4971 } 4972 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4973 if (scall == MAT_INITIAL_MATRIX) { 4974 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4975 } 4976 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4977 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4978 PetscFunctionReturn(0); 4979 } 4980 4981 /*@ 4982 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4983 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4984 with MatGetSize() 4985 4986 Not Collective 4987 4988 Input Parameters: 4989 + A - the matrix 4990 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4991 4992 Output Parameter: 4993 . A_loc - the local sequential matrix generated 4994 4995 Level: developer 4996 4997 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 4998 4999 @*/ 5000 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5001 { 5002 PetscErrorCode ierr; 5003 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5004 Mat_SeqAIJ *mat,*a,*b; 5005 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5006 MatScalar *aa,*ba,*cam; 5007 PetscScalar *ca; 5008 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5009 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5010 PetscBool match; 5011 MPI_Comm comm; 5012 PetscMPIInt size; 5013 5014 PetscFunctionBegin; 5015 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5016 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5017 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5018 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5019 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5020 5021 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5022 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5023 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5024 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5025 aa = a->a; ba = b->a; 5026 if (scall == MAT_INITIAL_MATRIX) { 5027 if (size == 1) { 5028 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5029 PetscFunctionReturn(0); 5030 } 5031 5032 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5033 ci[0] = 0; 5034 for (i=0; i<am; i++) { 5035 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5036 } 5037 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5038 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5039 k = 0; 5040 for (i=0; i<am; i++) { 5041 ncols_o = bi[i+1] - bi[i]; 5042 ncols_d = ai[i+1] - ai[i]; 5043 /* off-diagonal portion of A */ 5044 for (jo=0; jo<ncols_o; jo++) { 5045 col = cmap[*bj]; 5046 if (col >= cstart) break; 5047 cj[k] = col; bj++; 5048 ca[k++] = *ba++; 5049 } 5050 /* diagonal portion of A */ 5051 for (j=0; j<ncols_d; j++) { 5052 cj[k] = cstart + *aj++; 5053 ca[k++] = *aa++; 5054 } 5055 /* off-diagonal portion of A */ 5056 for (j=jo; j<ncols_o; j++) { 5057 cj[k] = cmap[*bj++]; 5058 ca[k++] = *ba++; 5059 } 5060 } 5061 /* put together the new matrix */ 5062 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5063 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5064 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5065 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5066 mat->free_a = PETSC_TRUE; 5067 mat->free_ij = PETSC_TRUE; 5068 mat->nonew = 0; 5069 } else if (scall == MAT_REUSE_MATRIX) { 5070 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5071 ci = mat->i; cj = mat->j; cam = mat->a; 5072 for (i=0; i<am; i++) { 5073 /* off-diagonal portion of A */ 5074 ncols_o = bi[i+1] - bi[i]; 5075 for (jo=0; jo<ncols_o; jo++) { 5076 col = cmap[*bj]; 5077 if (col >= cstart) break; 5078 *cam++ = *ba++; bj++; 5079 } 5080 /* diagonal portion of A */ 5081 ncols_d = ai[i+1] - ai[i]; 5082 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5083 /* off-diagonal portion of A */ 5084 for (j=jo; j<ncols_o; j++) { 5085 *cam++ = *ba++; bj++; 5086 } 5087 } 5088 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5089 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5090 PetscFunctionReturn(0); 5091 } 5092 5093 /*@C 5094 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5095 5096 Not Collective 5097 5098 Input Parameters: 5099 + A - the matrix 5100 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5101 - row, col - index sets of rows and columns to extract (or NULL) 5102 5103 Output Parameter: 5104 . A_loc - the local sequential matrix generated 5105 5106 Level: developer 5107 5108 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5109 5110 @*/ 5111 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5112 { 5113 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5114 PetscErrorCode ierr; 5115 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5116 IS isrowa,iscola; 5117 Mat *aloc; 5118 PetscBool match; 5119 5120 PetscFunctionBegin; 5121 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5122 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5123 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5124 if (!row) { 5125 start = A->rmap->rstart; end = A->rmap->rend; 5126 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5127 } else { 5128 isrowa = *row; 5129 } 5130 if (!col) { 5131 start = A->cmap->rstart; 5132 cmap = a->garray; 5133 nzA = a->A->cmap->n; 5134 nzB = a->B->cmap->n; 5135 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5136 ncols = 0; 5137 for (i=0; i<nzB; i++) { 5138 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5139 else break; 5140 } 5141 imark = i; 5142 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5143 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5144 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5145 } else { 5146 iscola = *col; 5147 } 5148 if (scall != MAT_INITIAL_MATRIX) { 5149 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5150 aloc[0] = *A_loc; 5151 } 5152 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5153 if (!col) { /* attach global id of condensed columns */ 5154 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5155 } 5156 *A_loc = aloc[0]; 5157 ierr = PetscFree(aloc);CHKERRQ(ierr); 5158 if (!row) { 5159 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5160 } 5161 if (!col) { 5162 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5163 } 5164 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5165 PetscFunctionReturn(0); 5166 } 5167 5168 /*@C 5169 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5170 5171 Collective on Mat 5172 5173 Input Parameters: 5174 + A,B - the matrices in mpiaij format 5175 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5176 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5177 5178 Output Parameter: 5179 + rowb, colb - index sets of rows and columns of B to extract 5180 - B_seq - the sequential matrix generated 5181 5182 Level: developer 5183 5184 @*/ 5185 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5186 { 5187 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5188 PetscErrorCode ierr; 5189 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5190 IS isrowb,iscolb; 5191 Mat *bseq=NULL; 5192 5193 PetscFunctionBegin; 5194 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5195 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5196 } 5197 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5198 5199 if (scall == MAT_INITIAL_MATRIX) { 5200 start = A->cmap->rstart; 5201 cmap = a->garray; 5202 nzA = a->A->cmap->n; 5203 nzB = a->B->cmap->n; 5204 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5205 ncols = 0; 5206 for (i=0; i<nzB; i++) { /* row < local row index */ 5207 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5208 else break; 5209 } 5210 imark = i; 5211 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5212 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5213 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5214 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5215 } else { 5216 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5217 isrowb = *rowb; iscolb = *colb; 5218 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5219 bseq[0] = *B_seq; 5220 } 5221 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5222 *B_seq = bseq[0]; 5223 ierr = PetscFree(bseq);CHKERRQ(ierr); 5224 if (!rowb) { 5225 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5226 } else { 5227 *rowb = isrowb; 5228 } 5229 if (!colb) { 5230 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5231 } else { 5232 *colb = iscolb; 5233 } 5234 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5235 PetscFunctionReturn(0); 5236 } 5237 5238 #include <petsc/private/vecscatterimpl.h> 5239 /* 5240 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5241 of the OFF-DIAGONAL portion of local A 5242 5243 Collective on Mat 5244 5245 Input Parameters: 5246 + A,B - the matrices in mpiaij format 5247 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5248 5249 Output Parameter: 5250 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5251 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5252 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5253 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5254 5255 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5256 for this matrix. This is not desirable.. 5257 5258 Level: developer 5259 5260 */ 5261 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5262 { 5263 VecScatter_MPI_General *gen_to,*gen_from; 5264 PetscErrorCode ierr; 5265 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5266 Mat_SeqAIJ *b_oth; 5267 VecScatter ctx; 5268 MPI_Comm comm; 5269 PetscMPIInt *rprocs,*sprocs,tag,rank; 5270 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5271 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5272 PetscScalar *b_otha,*bufa,*bufA,*vals; 5273 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5274 MPI_Request *rwaits = NULL,*swaits = NULL; 5275 MPI_Status *sstatus,rstatus; 5276 PetscMPIInt jj,size; 5277 VecScatterType type; 5278 PetscBool mpi1; 5279 5280 PetscFunctionBegin; 5281 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5282 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5283 5284 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5285 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5286 } 5287 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5288 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5289 5290 if (size == 1) { 5291 startsj_s = NULL; 5292 bufa_ptr = NULL; 5293 *B_oth = NULL; 5294 PetscFunctionReturn(0); 5295 } 5296 5297 ctx = a->Mvctx; 5298 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5299 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5300 if (!mpi1) { 5301 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5302 thus create a->Mvctx_mpi1 */ 5303 if (!a->Mvctx_mpi1) { 5304 a->Mvctx_mpi1_flg = PETSC_TRUE; 5305 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5306 } 5307 ctx = a->Mvctx_mpi1; 5308 } 5309 tag = ((PetscObject)ctx)->tag; 5310 5311 gen_to = (VecScatter_MPI_General*)ctx->todata; 5312 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5313 nrecvs = gen_from->n; 5314 nsends = gen_to->n; 5315 5316 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5317 srow = gen_to->indices; /* local row index to be sent */ 5318 sstarts = gen_to->starts; 5319 sprocs = gen_to->procs; 5320 sstatus = gen_to->sstatus; 5321 sbs = gen_to->bs; 5322 rstarts = gen_from->starts; 5323 rprocs = gen_from->procs; 5324 rbs = gen_from->bs; 5325 5326 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5327 if (scall == MAT_INITIAL_MATRIX) { 5328 /* i-array */ 5329 /*---------*/ 5330 /* post receives */ 5331 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5332 for (i=0; i<nrecvs; i++) { 5333 rowlen = rvalues + rstarts[i]*rbs; 5334 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5335 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5336 } 5337 5338 /* pack the outgoing message */ 5339 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5340 5341 sstartsj[0] = 0; 5342 rstartsj[0] = 0; 5343 len = 0; /* total length of j or a array to be sent */ 5344 k = 0; 5345 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5346 for (i=0; i<nsends; i++) { 5347 rowlen = svalues + sstarts[i]*sbs; 5348 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5349 for (j=0; j<nrows; j++) { 5350 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5351 for (l=0; l<sbs; l++) { 5352 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5353 5354 rowlen[j*sbs+l] = ncols; 5355 5356 len += ncols; 5357 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5358 } 5359 k++; 5360 } 5361 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5362 5363 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5364 } 5365 /* recvs and sends of i-array are completed */ 5366 i = nrecvs; 5367 while (i--) { 5368 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5369 } 5370 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5371 ierr = PetscFree(svalues);CHKERRQ(ierr); 5372 5373 /* allocate buffers for sending j and a arrays */ 5374 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5375 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5376 5377 /* create i-array of B_oth */ 5378 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5379 5380 b_othi[0] = 0; 5381 len = 0; /* total length of j or a array to be received */ 5382 k = 0; 5383 for (i=0; i<nrecvs; i++) { 5384 rowlen = rvalues + rstarts[i]*rbs; 5385 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5386 for (j=0; j<nrows; j++) { 5387 b_othi[k+1] = b_othi[k] + rowlen[j]; 5388 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5389 k++; 5390 } 5391 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5392 } 5393 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5394 5395 /* allocate space for j and a arrrays of B_oth */ 5396 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5397 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5398 5399 /* j-array */ 5400 /*---------*/ 5401 /* post receives of j-array */ 5402 for (i=0; i<nrecvs; i++) { 5403 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5404 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5405 } 5406 5407 /* pack the outgoing message j-array */ 5408 k = 0; 5409 for (i=0; i<nsends; i++) { 5410 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5411 bufJ = bufj+sstartsj[i]; 5412 for (j=0; j<nrows; j++) { 5413 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5414 for (ll=0; ll<sbs; ll++) { 5415 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5416 for (l=0; l<ncols; l++) { 5417 *bufJ++ = cols[l]; 5418 } 5419 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5420 } 5421 } 5422 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5423 } 5424 5425 /* recvs and sends of j-array are completed */ 5426 i = nrecvs; 5427 while (i--) { 5428 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5429 } 5430 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5431 } else if (scall == MAT_REUSE_MATRIX) { 5432 sstartsj = *startsj_s; 5433 rstartsj = *startsj_r; 5434 bufa = *bufa_ptr; 5435 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5436 b_otha = b_oth->a; 5437 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5438 5439 /* a-array */ 5440 /*---------*/ 5441 /* post receives of a-array */ 5442 for (i=0; i<nrecvs; i++) { 5443 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5444 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5445 } 5446 5447 /* pack the outgoing message a-array */ 5448 k = 0; 5449 for (i=0; i<nsends; i++) { 5450 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5451 bufA = bufa+sstartsj[i]; 5452 for (j=0; j<nrows; j++) { 5453 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5454 for (ll=0; ll<sbs; ll++) { 5455 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5456 for (l=0; l<ncols; l++) { 5457 *bufA++ = vals[l]; 5458 } 5459 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5460 } 5461 } 5462 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5463 } 5464 /* recvs and sends of a-array are completed */ 5465 i = nrecvs; 5466 while (i--) { 5467 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5468 } 5469 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5470 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5471 5472 if (scall == MAT_INITIAL_MATRIX) { 5473 /* put together the new matrix */ 5474 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5475 5476 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5477 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5478 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5479 b_oth->free_a = PETSC_TRUE; 5480 b_oth->free_ij = PETSC_TRUE; 5481 b_oth->nonew = 0; 5482 5483 ierr = PetscFree(bufj);CHKERRQ(ierr); 5484 if (!startsj_s || !bufa_ptr) { 5485 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5486 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5487 } else { 5488 *startsj_s = sstartsj; 5489 *startsj_r = rstartsj; 5490 *bufa_ptr = bufa; 5491 } 5492 } 5493 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5494 PetscFunctionReturn(0); 5495 } 5496 5497 /*@C 5498 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5499 5500 Not Collective 5501 5502 Input Parameters: 5503 . A - The matrix in mpiaij format 5504 5505 Output Parameter: 5506 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5507 . colmap - A map from global column index to local index into lvec 5508 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5509 5510 Level: developer 5511 5512 @*/ 5513 #if defined(PETSC_USE_CTABLE) 5514 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5515 #else 5516 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5517 #endif 5518 { 5519 Mat_MPIAIJ *a; 5520 5521 PetscFunctionBegin; 5522 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5523 PetscValidPointer(lvec, 2); 5524 PetscValidPointer(colmap, 3); 5525 PetscValidPointer(multScatter, 4); 5526 a = (Mat_MPIAIJ*) A->data; 5527 if (lvec) *lvec = a->lvec; 5528 if (colmap) *colmap = a->colmap; 5529 if (multScatter) *multScatter = a->Mvctx; 5530 PetscFunctionReturn(0); 5531 } 5532 5533 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5534 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5535 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5536 #if defined(PETSC_HAVE_MKL_SPARSE) 5537 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5538 #endif 5539 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5540 #if defined(PETSC_HAVE_ELEMENTAL) 5541 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5542 #endif 5543 #if defined(PETSC_HAVE_HYPRE) 5544 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5545 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5546 #endif 5547 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5548 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5549 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5550 5551 /* 5552 Computes (B'*A')' since computing B*A directly is untenable 5553 5554 n p p 5555 ( ) ( ) ( ) 5556 m ( A ) * n ( B ) = m ( C ) 5557 ( ) ( ) ( ) 5558 5559 */ 5560 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5561 { 5562 PetscErrorCode ierr; 5563 Mat At,Bt,Ct; 5564 5565 PetscFunctionBegin; 5566 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5567 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5568 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5569 ierr = MatDestroy(&At);CHKERRQ(ierr); 5570 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5571 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5572 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5573 PetscFunctionReturn(0); 5574 } 5575 5576 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5577 { 5578 PetscErrorCode ierr; 5579 PetscInt m=A->rmap->n,n=B->cmap->n; 5580 Mat Cmat; 5581 5582 PetscFunctionBegin; 5583 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5584 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5585 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5586 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5587 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5588 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5589 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5590 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5591 5592 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5593 5594 *C = Cmat; 5595 PetscFunctionReturn(0); 5596 } 5597 5598 /* ----------------------------------------------------------------*/ 5599 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5600 { 5601 PetscErrorCode ierr; 5602 5603 PetscFunctionBegin; 5604 if (scall == MAT_INITIAL_MATRIX) { 5605 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5606 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5607 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5608 } 5609 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5610 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5611 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5612 PetscFunctionReturn(0); 5613 } 5614 5615 /*MC 5616 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5617 5618 Options Database Keys: 5619 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5620 5621 Level: beginner 5622 5623 .seealso: MatCreateAIJ() 5624 M*/ 5625 5626 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5627 { 5628 Mat_MPIAIJ *b; 5629 PetscErrorCode ierr; 5630 PetscMPIInt size; 5631 5632 PetscFunctionBegin; 5633 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5634 5635 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5636 B->data = (void*)b; 5637 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5638 B->assembled = PETSC_FALSE; 5639 B->insertmode = NOT_SET_VALUES; 5640 b->size = size; 5641 5642 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5643 5644 /* build cache for off array entries formed */ 5645 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5646 5647 b->donotstash = PETSC_FALSE; 5648 b->colmap = 0; 5649 b->garray = 0; 5650 b->roworiented = PETSC_TRUE; 5651 5652 /* stuff used for matrix vector multiply */ 5653 b->lvec = NULL; 5654 b->Mvctx = NULL; 5655 5656 /* stuff for MatGetRow() */ 5657 b->rowindices = 0; 5658 b->rowvalues = 0; 5659 b->getrowactive = PETSC_FALSE; 5660 5661 /* flexible pointer used in CUSP/CUSPARSE classes */ 5662 b->spptr = NULL; 5663 5664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5674 #if defined(PETSC_HAVE_MKL_SPARSE) 5675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5676 #endif 5677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5679 #if defined(PETSC_HAVE_ELEMENTAL) 5680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5681 #endif 5682 #if defined(PETSC_HAVE_HYPRE) 5683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5684 #endif 5685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5690 #if defined(PETSC_HAVE_HYPRE) 5691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5692 #endif 5693 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5694 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5695 PetscFunctionReturn(0); 5696 } 5697 5698 /*@C 5699 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5700 and "off-diagonal" part of the matrix in CSR format. 5701 5702 Collective on MPI_Comm 5703 5704 Input Parameters: 5705 + comm - MPI communicator 5706 . m - number of local rows (Cannot be PETSC_DECIDE) 5707 . n - This value should be the same as the local size used in creating the 5708 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5709 calculated if N is given) For square matrices n is almost always m. 5710 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5711 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5712 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5713 . j - column indices 5714 . a - matrix values 5715 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5716 . oj - column indices 5717 - oa - matrix values 5718 5719 Output Parameter: 5720 . mat - the matrix 5721 5722 Level: advanced 5723 5724 Notes: 5725 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5726 must free the arrays once the matrix has been destroyed and not before. 5727 5728 The i and j indices are 0 based 5729 5730 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5731 5732 This sets local rows and cannot be used to set off-processor values. 5733 5734 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5735 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5736 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5737 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5738 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5739 communication if it is known that only local entries will be set. 5740 5741 .keywords: matrix, aij, compressed row, sparse, parallel 5742 5743 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5744 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5745 @*/ 5746 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5747 { 5748 PetscErrorCode ierr; 5749 Mat_MPIAIJ *maij; 5750 5751 PetscFunctionBegin; 5752 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5753 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5754 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5755 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5756 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5757 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5758 maij = (Mat_MPIAIJ*) (*mat)->data; 5759 5760 (*mat)->preallocated = PETSC_TRUE; 5761 5762 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5763 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5764 5765 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5766 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5767 5768 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5769 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5770 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5771 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5772 5773 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5774 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5775 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5776 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5777 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5778 PetscFunctionReturn(0); 5779 } 5780 5781 /* 5782 Special version for direct calls from Fortran 5783 */ 5784 #include <petsc/private/fortranimpl.h> 5785 5786 /* Change these macros so can be used in void function */ 5787 #undef CHKERRQ 5788 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5789 #undef SETERRQ2 5790 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5791 #undef SETERRQ3 5792 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5793 #undef SETERRQ 5794 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5795 5796 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5797 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5798 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5799 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5800 #else 5801 #endif 5802 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5803 { 5804 Mat mat = *mmat; 5805 PetscInt m = *mm, n = *mn; 5806 InsertMode addv = *maddv; 5807 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5808 PetscScalar value; 5809 PetscErrorCode ierr; 5810 5811 MatCheckPreallocated(mat,1); 5812 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5813 5814 #if defined(PETSC_USE_DEBUG) 5815 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5816 #endif 5817 { 5818 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5819 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5820 PetscBool roworiented = aij->roworiented; 5821 5822 /* Some Variables required in the macro */ 5823 Mat A = aij->A; 5824 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5825 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5826 MatScalar *aa = a->a; 5827 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5828 Mat B = aij->B; 5829 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5830 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5831 MatScalar *ba = b->a; 5832 5833 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5834 PetscInt nonew = a->nonew; 5835 MatScalar *ap1,*ap2; 5836 5837 PetscFunctionBegin; 5838 for (i=0; i<m; i++) { 5839 if (im[i] < 0) continue; 5840 #if defined(PETSC_USE_DEBUG) 5841 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5842 #endif 5843 if (im[i] >= rstart && im[i] < rend) { 5844 row = im[i] - rstart; 5845 lastcol1 = -1; 5846 rp1 = aj + ai[row]; 5847 ap1 = aa + ai[row]; 5848 rmax1 = aimax[row]; 5849 nrow1 = ailen[row]; 5850 low1 = 0; 5851 high1 = nrow1; 5852 lastcol2 = -1; 5853 rp2 = bj + bi[row]; 5854 ap2 = ba + bi[row]; 5855 rmax2 = bimax[row]; 5856 nrow2 = bilen[row]; 5857 low2 = 0; 5858 high2 = nrow2; 5859 5860 for (j=0; j<n; j++) { 5861 if (roworiented) value = v[i*n+j]; 5862 else value = v[i+j*m]; 5863 if (in[j] >= cstart && in[j] < cend) { 5864 col = in[j] - cstart; 5865 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5866 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5867 } else if (in[j] < 0) continue; 5868 #if defined(PETSC_USE_DEBUG) 5869 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5870 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5871 #endif 5872 else { 5873 if (mat->was_assembled) { 5874 if (!aij->colmap) { 5875 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5876 } 5877 #if defined(PETSC_USE_CTABLE) 5878 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5879 col--; 5880 #else 5881 col = aij->colmap[in[j]] - 1; 5882 #endif 5883 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5884 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5885 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5886 col = in[j]; 5887 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5888 B = aij->B; 5889 b = (Mat_SeqAIJ*)B->data; 5890 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5891 rp2 = bj + bi[row]; 5892 ap2 = ba + bi[row]; 5893 rmax2 = bimax[row]; 5894 nrow2 = bilen[row]; 5895 low2 = 0; 5896 high2 = nrow2; 5897 bm = aij->B->rmap->n; 5898 ba = b->a; 5899 } 5900 } else col = in[j]; 5901 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5902 } 5903 } 5904 } else if (!aij->donotstash) { 5905 if (roworiented) { 5906 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5907 } else { 5908 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5909 } 5910 } 5911 } 5912 } 5913 PetscFunctionReturnVoid(); 5914 } 5915