1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/vecscatterimpl.h> 6 #include <petsc/private/isimpl.h> 7 #include <petscblaslapack.h> 8 #include <petscsf.h> 9 10 /*MC 11 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 12 13 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 14 and MATMPIAIJ otherwise. As a result, for single process communicators, 15 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 16 for communicators controlling multiple processes. It is recommended that you call both of 17 the above preallocation routines for simplicity. 18 19 Options Database Keys: 20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 21 22 Developer Notes: 23 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 24 enough exist. 25 26 Level: beginner 27 28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 29 M*/ 30 31 /*MC 32 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 35 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 42 43 Level: beginner 44 45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 46 M*/ 47 48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 49 { 50 PetscErrorCode ierr; 51 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 52 53 PetscFunctionBegin; 54 if (mat->A) { 55 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 56 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 57 } 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 62 { 63 PetscErrorCode ierr; 64 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 65 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 66 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 67 const PetscInt *ia,*ib; 68 const MatScalar *aa,*bb; 69 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 70 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 71 72 PetscFunctionBegin; 73 *keptrows = 0; 74 ia = a->i; 75 ib = b->i; 76 for (i=0; i<m; i++) { 77 na = ia[i+1] - ia[i]; 78 nb = ib[i+1] - ib[i]; 79 if (!na && !nb) { 80 cnt++; 81 goto ok1; 82 } 83 aa = a->a + ia[i]; 84 for (j=0; j<na; j++) { 85 if (aa[j] != 0.0) goto ok1; 86 } 87 bb = b->a + ib[i]; 88 for (j=0; j <nb; j++) { 89 if (bb[j] != 0.0) goto ok1; 90 } 91 cnt++; 92 ok1:; 93 } 94 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 95 if (!n0rows) PetscFunctionReturn(0); 96 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 97 cnt = 0; 98 for (i=0; i<m; i++) { 99 na = ia[i+1] - ia[i]; 100 nb = ib[i+1] - ib[i]; 101 if (!na && !nb) continue; 102 aa = a->a + ia[i]; 103 for (j=0; j<na;j++) { 104 if (aa[j] != 0.0) { 105 rows[cnt++] = rstart + i; 106 goto ok2; 107 } 108 } 109 bb = b->a + ib[i]; 110 for (j=0; j<nb; j++) { 111 if (bb[j] != 0.0) { 112 rows[cnt++] = rstart + i; 113 goto ok2; 114 } 115 } 116 ok2:; 117 } 118 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 123 { 124 PetscErrorCode ierr; 125 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 126 PetscBool cong; 127 128 PetscFunctionBegin; 129 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 130 if (Y->assembled && cong) { 131 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 132 } else { 133 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 134 } 135 PetscFunctionReturn(0); 136 } 137 138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 139 { 140 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 141 PetscErrorCode ierr; 142 PetscInt i,rstart,nrows,*rows; 143 144 PetscFunctionBegin; 145 *zrows = NULL; 146 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 147 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 148 for (i=0; i<nrows; i++) rows[i] += rstart; 149 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 154 { 155 PetscErrorCode ierr; 156 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 157 PetscInt i,n,*garray = aij->garray; 158 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 159 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 160 PetscReal *work; 161 162 PetscFunctionBegin; 163 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 164 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 165 if (type == NORM_2) { 166 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 167 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 168 } 169 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 170 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 171 } 172 } else if (type == NORM_1) { 173 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 174 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 175 } 176 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 177 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 178 } 179 } else if (type == NORM_INFINITY) { 180 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 181 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 182 } 183 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 184 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 185 } 186 187 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 188 if (type == NORM_INFINITY) { 189 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 190 } else { 191 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 192 } 193 ierr = PetscFree(work);CHKERRQ(ierr); 194 if (type == NORM_2) { 195 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 201 { 202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 203 IS sis,gis; 204 PetscErrorCode ierr; 205 const PetscInt *isis,*igis; 206 PetscInt n,*iis,nsis,ngis,rstart,i; 207 208 PetscFunctionBegin; 209 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 210 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 211 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 212 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 213 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 214 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 215 216 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 218 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 219 n = ngis + nsis; 220 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 221 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 222 for (i=0; i<n; i++) iis[i] += rstart; 223 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 224 225 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 226 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 227 ierr = ISDestroy(&sis);CHKERRQ(ierr); 228 ierr = ISDestroy(&gis);CHKERRQ(ierr); 229 PetscFunctionReturn(0); 230 } 231 232 /* 233 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 234 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 235 236 Only for square matrices 237 238 Used by a preconditioner, hence PETSC_EXTERN 239 */ 240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 241 { 242 PetscMPIInt rank,size; 243 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 244 PetscErrorCode ierr; 245 Mat mat; 246 Mat_SeqAIJ *gmata; 247 PetscMPIInt tag; 248 MPI_Status status; 249 PetscBool aij; 250 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 251 252 PetscFunctionBegin; 253 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 254 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 255 if (!rank) { 256 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 257 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 258 } 259 if (reuse == MAT_INITIAL_MATRIX) { 260 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 261 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 262 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 263 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 264 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 265 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 266 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 267 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 268 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 269 270 rowners[0] = 0; 271 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 272 rstart = rowners[rank]; 273 rend = rowners[rank+1]; 274 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 275 if (!rank) { 276 gmata = (Mat_SeqAIJ*) gmat->data; 277 /* send row lengths to all processors */ 278 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 279 for (i=1; i<size; i++) { 280 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 281 } 282 /* determine number diagonal and off-diagonal counts */ 283 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 284 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 285 jj = 0; 286 for (i=0; i<m; i++) { 287 for (j=0; j<dlens[i]; j++) { 288 if (gmata->j[jj] < rstart) ld[i]++; 289 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 290 jj++; 291 } 292 } 293 /* send column indices to other processes */ 294 for (i=1; i<size; i++) { 295 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 296 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 298 } 299 300 /* send numerical values to other processes */ 301 for (i=1; i<size; i++) { 302 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 303 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 304 } 305 gmataa = gmata->a; 306 gmataj = gmata->j; 307 308 } else { 309 /* receive row lengths */ 310 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* receive column indices */ 312 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 313 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 314 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 315 /* determine number diagonal and off-diagonal counts */ 316 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 317 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 318 jj = 0; 319 for (i=0; i<m; i++) { 320 for (j=0; j<dlens[i]; j++) { 321 if (gmataj[jj] < rstart) ld[i]++; 322 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 323 jj++; 324 } 325 } 326 /* receive numerical values */ 327 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 328 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 329 } 330 /* set preallocation */ 331 for (i=0; i<m; i++) { 332 dlens[i] -= olens[i]; 333 } 334 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 335 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 336 337 for (i=0; i<m; i++) { 338 dlens[i] += olens[i]; 339 } 340 cnt = 0; 341 for (i=0; i<m; i++) { 342 row = rstart + i; 343 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 344 cnt += dlens[i]; 345 } 346 if (rank) { 347 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 348 } 349 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 350 ierr = PetscFree(rowners);CHKERRQ(ierr); 351 352 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 353 354 *inmat = mat; 355 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 356 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 357 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 358 mat = *inmat; 359 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 360 if (!rank) { 361 /* send numerical values to other processes */ 362 gmata = (Mat_SeqAIJ*) gmat->data; 363 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 364 gmataa = gmata->a; 365 for (i=1; i<size; i++) { 366 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 367 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 368 } 369 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 370 } else { 371 /* receive numerical values from process 0*/ 372 nz = Ad->nz + Ao->nz; 373 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 374 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 375 } 376 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 377 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 378 ad = Ad->a; 379 ao = Ao->a; 380 if (mat->rmap->n) { 381 i = 0; 382 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 for (i=1; i<mat->rmap->n; i++) { 386 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 387 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 388 } 389 i--; 390 if (mat->rmap->n) { 391 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 392 } 393 if (rank) { 394 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 395 } 396 } 397 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 399 PetscFunctionReturn(0); 400 } 401 402 /* 403 Local utility routine that creates a mapping from the global column 404 number to the local number in the off-diagonal part of the local 405 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 406 a slightly higher hash table cost; without it it is not scalable (each processor 407 has an order N integer array but is fast to acess. 408 */ 409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscInt n = aij->B->cmap->n,i; 414 415 PetscFunctionBegin; 416 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 417 #if defined(PETSC_USE_CTABLE) 418 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 419 for (i=0; i<n; i++) { 420 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 421 } 422 #else 423 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 424 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 425 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 426 #endif 427 PetscFunctionReturn(0); 428 } 429 430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 431 { \ 432 if (col <= lastcol1) low1 = 0; \ 433 else high1 = nrow1; \ 434 lastcol1 = col;\ 435 while (high1-low1 > 5) { \ 436 t = (low1+high1)/2; \ 437 if (rp1[t] > col) high1 = t; \ 438 else low1 = t; \ 439 } \ 440 for (_i=low1; _i<high1; _i++) { \ 441 if (rp1[_i] > col) break; \ 442 if (rp1[_i] == col) { \ 443 if (addv == ADD_VALUES) ap1[_i] += value; \ 444 else ap1[_i] = value; \ 445 goto a_noinsert; \ 446 } \ 447 } \ 448 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 449 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 450 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 451 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 452 N = nrow1++ - 1; a->nz++; high1++; \ 453 /* shift up all the later entries in this row */ \ 454 for (ii=N; ii>=_i; ii--) { \ 455 rp1[ii+1] = rp1[ii]; \ 456 ap1[ii+1] = ap1[ii]; \ 457 } \ 458 rp1[_i] = col; \ 459 ap1[_i] = value; \ 460 A->nonzerostate++;\ 461 a_noinsert: ; \ 462 ailen[row] = nrow1; \ 463 } 464 465 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 466 { \ 467 if (col <= lastcol2) low2 = 0; \ 468 else high2 = nrow2; \ 469 lastcol2 = col; \ 470 while (high2-low2 > 5) { \ 471 t = (low2+high2)/2; \ 472 if (rp2[t] > col) high2 = t; \ 473 else low2 = t; \ 474 } \ 475 for (_i=low2; _i<high2; _i++) { \ 476 if (rp2[_i] > col) break; \ 477 if (rp2[_i] == col) { \ 478 if (addv == ADD_VALUES) ap2[_i] += value; \ 479 else ap2[_i] = value; \ 480 goto b_noinsert; \ 481 } \ 482 } \ 483 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 485 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 486 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 487 N = nrow2++ - 1; b->nz++; high2++; \ 488 /* shift up all the later entries in this row */ \ 489 for (ii=N; ii>=_i; ii--) { \ 490 rp2[ii+1] = rp2[ii]; \ 491 ap2[ii+1] = ap2[ii]; \ 492 } \ 493 rp2[_i] = col; \ 494 ap2[_i] = value; \ 495 B->nonzerostate++; \ 496 b_noinsert: ; \ 497 bilen[row] = nrow2; \ 498 } 499 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 527 { 528 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 529 PetscScalar value; 530 PetscErrorCode ierr; 531 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 532 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 533 PetscBool roworiented = aij->roworiented; 534 535 /* Some Variables required in the macro */ 536 Mat A = aij->A; 537 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 538 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 539 MatScalar *aa = a->a; 540 PetscBool ignorezeroentries = a->ignorezeroentries; 541 Mat B = aij->B; 542 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 543 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 544 MatScalar *ba = b->a; 545 546 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1,*ap2; 549 550 PetscFunctionBegin; 551 for (i=0; i<m; i++) { 552 if (im[i] < 0) continue; 553 #if defined(PETSC_USE_DEBUG) 554 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 555 #endif 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = aj + ai[row]; 560 ap1 = aa + ai[row]; 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = bj + bi[row]; 567 ap2 = ba + bi[row]; 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j=0; j<n; j++) { 574 if (roworiented) value = v[i*n+j]; 575 else value = v[i+j*m]; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 580 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 581 } else if (in[j] < 0) continue; 582 #if defined(PETSC_USE_DEBUG) 583 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 584 #endif 585 else { 586 if (mat->was_assembled) { 587 if (!aij->colmap) { 588 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 589 } 590 #if defined(PETSC_USE_CTABLE) 591 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 592 col--; 593 #else 594 col = aij->colmap[in[j]] - 1; 595 #endif 596 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 597 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 598 col = in[j]; 599 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 600 B = aij->B; 601 b = (Mat_SeqAIJ*)B->data; 602 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 603 rp2 = bj + bi[row]; 604 ap2 = ba + bi[row]; 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0) { 612 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 613 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 614 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 615 } 616 } else col = in[j]; 617 nonew = b->nonew; 618 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 619 } 620 } 621 } else { 622 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 623 if (!aij->donotstash) { 624 mat->assembled = PETSC_FALSE; 625 if (roworiented) { 626 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } else { 628 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 629 } 630 } 631 } 632 } 633 PetscFunctionReturn(0); 634 } 635 636 /* 637 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 638 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 639 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 640 */ 641 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 642 { 643 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 644 Mat A = aij->A; /* diagonal part of the matrix */ 645 Mat B = aij->B; /* offdiagonal part of the matrix */ 646 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 647 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 648 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 649 PetscInt *ailen = a->ilen,*aj = a->j; 650 PetscInt *bilen = b->ilen,*bj = b->j; 651 PetscInt am = aij->A->rmap->n,j; 652 PetscInt diag_so_far = 0,dnz; 653 PetscInt offd_so_far = 0,onz; 654 655 PetscFunctionBegin; 656 /* Iterate over all rows of the matrix */ 657 for (j=0; j<am; j++) { 658 dnz = onz = 0; 659 /* Iterate over all non-zero columns of the current row */ 660 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 661 /* If column is in the diagonal */ 662 if (mat_j[col] >= cstart && mat_j[col] < cend) { 663 aj[diag_so_far++] = mat_j[col] - cstart; 664 dnz++; 665 } else { /* off-diagonal entries */ 666 bj[offd_so_far++] = mat_j[col]; 667 onz++; 668 } 669 } 670 ailen[j] = dnz; 671 bilen[j] = onz; 672 } 673 PetscFunctionReturn(0); 674 } 675 676 /* 677 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 678 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 679 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 680 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 681 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 682 */ 683 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 684 { 685 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 686 Mat A = aij->A; /* diagonal part of the matrix */ 687 Mat B = aij->B; /* offdiagonal part of the matrix */ 688 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 689 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 690 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 691 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 692 PetscInt *ailen = a->ilen,*aj = a->j; 693 PetscInt *bilen = b->ilen,*bj = b->j; 694 PetscInt am = aij->A->rmap->n,j; 695 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 696 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 697 PetscScalar *aa = a->a,*ba = b->a; 698 699 PetscFunctionBegin; 700 /* Iterate over all rows of the matrix */ 701 for (j=0; j<am; j++) { 702 dnz_row = onz_row = 0; 703 rowstart_offd = full_offd_i[j]; 704 rowstart_diag = full_diag_i[j]; 705 /* Iterate over all non-zero columns of the current row */ 706 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 707 /* If column is in the diagonal */ 708 if (mat_j[col] >= cstart && mat_j[col] < cend) { 709 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 710 aa[rowstart_diag+dnz_row] = mat_a[col]; 711 dnz_row++; 712 } else { /* off-diagonal entries */ 713 bj[rowstart_offd+onz_row] = mat_j[col]; 714 ba[rowstart_offd+onz_row] = mat_a[col]; 715 onz_row++; 716 } 717 } 718 ailen[j] = dnz_row; 719 bilen[j] = onz_row; 720 } 721 PetscFunctionReturn(0); 722 } 723 724 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 725 { 726 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 727 PetscErrorCode ierr; 728 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 729 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 730 731 PetscFunctionBegin; 732 for (i=0; i<m; i++) { 733 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 734 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 735 if (idxm[i] >= rstart && idxm[i] < rend) { 736 row = idxm[i] - rstart; 737 for (j=0; j<n; j++) { 738 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 739 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 740 if (idxn[j] >= cstart && idxn[j] < cend) { 741 col = idxn[j] - cstart; 742 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 743 } else { 744 if (!aij->colmap) { 745 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 746 } 747 #if defined(PETSC_USE_CTABLE) 748 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 749 col--; 750 #else 751 col = aij->colmap[idxn[j]] - 1; 752 #endif 753 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 754 else { 755 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 756 } 757 } 758 } 759 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 760 } 761 PetscFunctionReturn(0); 762 } 763 764 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 765 766 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 767 { 768 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 769 PetscErrorCode ierr; 770 PetscInt nstash,reallocs; 771 772 PetscFunctionBegin; 773 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 774 775 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 776 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 777 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 782 { 783 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 784 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 785 PetscErrorCode ierr; 786 PetscMPIInt n; 787 PetscInt i,j,rstart,ncols,flg; 788 PetscInt *row,*col; 789 PetscBool other_disassembled; 790 PetscScalar *val; 791 792 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 793 794 PetscFunctionBegin; 795 if (!aij->donotstash && !mat->nooffprocentries) { 796 while (1) { 797 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 798 if (!flg) break; 799 800 for (i=0; i<n; ) { 801 /* Now identify the consecutive vals belonging to the same row */ 802 for (j=i,rstart=row[j]; j<n; j++) { 803 if (row[j] != rstart) break; 804 } 805 if (j < n) ncols = j-i; 806 else ncols = n-i; 807 /* Now assemble all these values with a single function call */ 808 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 809 810 i = j; 811 } 812 } 813 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 814 } 815 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 816 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 817 818 /* determine if any processor has disassembled, if so we must 819 also disassemble ourselfs, in order that we may reassemble. */ 820 /* 821 if nonzero structure of submatrix B cannot change then we know that 822 no processor disassembled thus we can skip this stuff 823 */ 824 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 825 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 826 if (mat->was_assembled && !other_disassembled) { 827 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 828 } 829 } 830 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 831 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 832 } 833 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 834 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 835 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 836 837 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 838 839 aij->rowvalues = 0; 840 841 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 842 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 843 844 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 845 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 846 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 847 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 848 } 849 PetscFunctionReturn(0); 850 } 851 852 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 853 { 854 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 855 PetscErrorCode ierr; 856 857 PetscFunctionBegin; 858 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 859 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 860 PetscFunctionReturn(0); 861 } 862 863 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 864 { 865 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 866 PetscInt *lrows; 867 PetscInt r, len; 868 PetscBool cong; 869 PetscErrorCode ierr; 870 871 PetscFunctionBegin; 872 /* get locally owned rows */ 873 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 874 /* fix right hand side if needed */ 875 if (x && b) { 876 const PetscScalar *xx; 877 PetscScalar *bb; 878 879 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 880 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 881 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 882 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 883 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 884 } 885 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 886 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 887 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 888 if ((diag != 0.0) && cong) { 889 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 890 } else if (diag != 0.0) { 891 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 892 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 893 for (r = 0; r < len; ++r) { 894 const PetscInt row = lrows[r] + A->rmap->rstart; 895 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 896 } 897 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 898 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 899 } else { 900 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 901 } 902 ierr = PetscFree(lrows);CHKERRQ(ierr); 903 904 /* only change matrix nonzero state if pattern was allowed to be changed */ 905 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 906 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 907 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 908 } 909 PetscFunctionReturn(0); 910 } 911 912 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 913 { 914 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 915 PetscErrorCode ierr; 916 PetscMPIInt n = A->rmap->n; 917 PetscInt i,j,r,m,p = 0,len = 0; 918 PetscInt *lrows,*owners = A->rmap->range; 919 PetscSFNode *rrows; 920 PetscSF sf; 921 const PetscScalar *xx; 922 PetscScalar *bb,*mask; 923 Vec xmask,lmask; 924 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 925 const PetscInt *aj, *ii,*ridx; 926 PetscScalar *aa; 927 928 PetscFunctionBegin; 929 /* Create SF where leaves are input rows and roots are owned rows */ 930 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 931 for (r = 0; r < n; ++r) lrows[r] = -1; 932 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 933 for (r = 0; r < N; ++r) { 934 const PetscInt idx = rows[r]; 935 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 936 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 937 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 938 } 939 rrows[r].rank = p; 940 rrows[r].index = rows[r] - owners[p]; 941 } 942 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 943 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 944 /* Collect flags for rows to be zeroed */ 945 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 946 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 947 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 948 /* Compress and put in row numbers */ 949 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 950 /* zero diagonal part of matrix */ 951 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 952 /* handle off diagonal part of matrix */ 953 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 954 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 955 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 956 for (i=0; i<len; i++) bb[lrows[i]] = 1; 957 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 958 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 960 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 961 if (x) { 962 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 964 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 965 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 966 } 967 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 968 /* remove zeroed rows of off diagonal matrix */ 969 ii = aij->i; 970 for (i=0; i<len; i++) { 971 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 972 } 973 /* loop over all elements of off process part of matrix zeroing removed columns*/ 974 if (aij->compressedrow.use) { 975 m = aij->compressedrow.nrows; 976 ii = aij->compressedrow.i; 977 ridx = aij->compressedrow.rindex; 978 for (i=0; i<m; i++) { 979 n = ii[i+1] - ii[i]; 980 aj = aij->j + ii[i]; 981 aa = aij->a + ii[i]; 982 983 for (j=0; j<n; j++) { 984 if (PetscAbsScalar(mask[*aj])) { 985 if (b) bb[*ridx] -= *aa*xx[*aj]; 986 *aa = 0.0; 987 } 988 aa++; 989 aj++; 990 } 991 ridx++; 992 } 993 } else { /* do not use compressed row format */ 994 m = l->B->rmap->n; 995 for (i=0; i<m; i++) { 996 n = ii[i+1] - ii[i]; 997 aj = aij->j + ii[i]; 998 aa = aij->a + ii[i]; 999 for (j=0; j<n; j++) { 1000 if (PetscAbsScalar(mask[*aj])) { 1001 if (b) bb[i] -= *aa*xx[*aj]; 1002 *aa = 0.0; 1003 } 1004 aa++; 1005 aj++; 1006 } 1007 } 1008 } 1009 if (x) { 1010 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1011 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 } 1013 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1014 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1015 ierr = PetscFree(lrows);CHKERRQ(ierr); 1016 1017 /* only change matrix nonzero state if pattern was allowed to be changed */ 1018 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1019 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1020 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1021 } 1022 PetscFunctionReturn(0); 1023 } 1024 1025 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1026 { 1027 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1028 PetscErrorCode ierr; 1029 PetscInt nt; 1030 VecScatter Mvctx = a->Mvctx; 1031 1032 PetscFunctionBegin; 1033 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1034 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1035 1036 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1038 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1039 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1040 PetscFunctionReturn(0); 1041 } 1042 1043 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1044 { 1045 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1046 PetscErrorCode ierr; 1047 1048 PetscFunctionBegin; 1049 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1050 PetscFunctionReturn(0); 1051 } 1052 1053 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1054 { 1055 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1056 PetscErrorCode ierr; 1057 VecScatter Mvctx = a->Mvctx; 1058 1059 PetscFunctionBegin; 1060 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1061 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1062 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1063 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1064 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1065 PetscFunctionReturn(0); 1066 } 1067 1068 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1071 PetscErrorCode ierr; 1072 1073 PetscFunctionBegin; 1074 /* do nondiagonal part */ 1075 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1076 /* do local part */ 1077 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1078 /* add partial results together */ 1079 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1080 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1085 { 1086 MPI_Comm comm; 1087 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1088 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1089 IS Me,Notme; 1090 PetscErrorCode ierr; 1091 PetscInt M,N,first,last,*notme,i; 1092 PetscBool lf; 1093 PetscMPIInt size; 1094 1095 PetscFunctionBegin; 1096 /* Easy test: symmetric diagonal block */ 1097 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1098 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1099 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1100 if (!*f) PetscFunctionReturn(0); 1101 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1102 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1103 if (size == 1) PetscFunctionReturn(0); 1104 1105 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1106 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1107 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1108 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1109 for (i=0; i<first; i++) notme[i] = i; 1110 for (i=last; i<M; i++) notme[i-last+first] = i; 1111 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1112 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1113 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1114 Aoff = Aoffs[0]; 1115 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1116 Boff = Boffs[0]; 1117 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1118 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1119 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1120 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1121 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1122 ierr = PetscFree(notme);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1127 { 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1136 { 1137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1138 PetscErrorCode ierr; 1139 1140 PetscFunctionBegin; 1141 /* do nondiagonal part */ 1142 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1143 /* do local part */ 1144 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1145 /* add partial results together */ 1146 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1147 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 /* 1152 This only works correctly for square matrices where the subblock A->A is the 1153 diagonal block 1154 */ 1155 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1156 { 1157 PetscErrorCode ierr; 1158 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1159 1160 PetscFunctionBegin; 1161 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1162 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1163 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1168 { 1169 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1170 PetscErrorCode ierr; 1171 1172 PetscFunctionBegin; 1173 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1174 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1179 { 1180 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1181 PetscErrorCode ierr; 1182 1183 PetscFunctionBegin; 1184 #if defined(PETSC_USE_LOG) 1185 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1186 #endif 1187 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1188 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1189 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1190 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1191 #if defined(PETSC_USE_CTABLE) 1192 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1193 #else 1194 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1195 #endif 1196 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1197 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1198 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1199 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1200 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1201 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1202 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1203 1204 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1205 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1206 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1207 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1208 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1209 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1210 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1211 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1212 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1213 #if defined(PETSC_HAVE_ELEMENTAL) 1214 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1215 #endif 1216 #if defined(PETSC_HAVE_HYPRE) 1217 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1219 #endif 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1222 PetscFunctionReturn(0); 1223 } 1224 1225 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1226 { 1227 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1228 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1229 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1230 PetscErrorCode ierr; 1231 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1232 int fd; 1233 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1234 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1235 PetscScalar *column_values; 1236 PetscInt message_count,flowcontrolcount; 1237 FILE *file; 1238 1239 PetscFunctionBegin; 1240 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1241 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1242 nz = A->nz + B->nz; 1243 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1244 if (!rank) { 1245 header[0] = MAT_FILE_CLASSID; 1246 header[1] = mat->rmap->N; 1247 header[2] = mat->cmap->N; 1248 1249 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1250 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1251 /* get largest number of rows any processor has */ 1252 rlen = mat->rmap->n; 1253 range = mat->rmap->range; 1254 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1255 } else { 1256 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1257 rlen = mat->rmap->n; 1258 } 1259 1260 /* load up the local row counts */ 1261 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1262 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1263 1264 /* store the row lengths to the file */ 1265 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1266 if (!rank) { 1267 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1268 for (i=1; i<size; i++) { 1269 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1270 rlen = range[i+1] - range[i]; 1271 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1272 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1273 } 1274 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1275 } else { 1276 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1277 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1278 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1279 } 1280 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1281 1282 /* load up the local column indices */ 1283 nzmax = nz; /* th processor needs space a largest processor needs */ 1284 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1285 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1286 cnt = 0; 1287 for (i=0; i<mat->rmap->n; i++) { 1288 for (j=B->i[i]; j<B->i[i+1]; j++) { 1289 if ((col = garray[B->j[j]]) > cstart) break; 1290 column_indices[cnt++] = col; 1291 } 1292 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1293 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1294 } 1295 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1296 1297 /* store the column indices to the file */ 1298 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1299 if (!rank) { 1300 MPI_Status status; 1301 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1302 for (i=1; i<size; i++) { 1303 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1304 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1305 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1306 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1307 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1308 } 1309 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1310 } else { 1311 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1312 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1313 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1314 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1315 } 1316 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1317 1318 /* load up the local column values */ 1319 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1320 cnt = 0; 1321 for (i=0; i<mat->rmap->n; i++) { 1322 for (j=B->i[i]; j<B->i[i+1]; j++) { 1323 if (garray[B->j[j]] > cstart) break; 1324 column_values[cnt++] = B->a[j]; 1325 } 1326 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1327 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1328 } 1329 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1330 1331 /* store the column values to the file */ 1332 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1333 if (!rank) { 1334 MPI_Status status; 1335 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1336 for (i=1; i<size; i++) { 1337 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1338 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1339 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1340 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1341 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1342 } 1343 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1344 } else { 1345 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1346 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1347 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1348 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1349 } 1350 ierr = PetscFree(column_values);CHKERRQ(ierr); 1351 1352 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1353 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #include <petscdraw.h> 1358 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1359 { 1360 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1361 PetscErrorCode ierr; 1362 PetscMPIInt rank = aij->rank,size = aij->size; 1363 PetscBool isdraw,iascii,isbinary; 1364 PetscViewer sviewer; 1365 PetscViewerFormat format; 1366 1367 PetscFunctionBegin; 1368 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1369 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1370 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1371 if (iascii) { 1372 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1373 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1374 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1375 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1376 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1377 for (i=0; i<(PetscInt)size; i++) { 1378 nmax = PetscMax(nmax,nz[i]); 1379 nmin = PetscMin(nmin,nz[i]); 1380 navg += nz[i]; 1381 } 1382 ierr = PetscFree(nz);CHKERRQ(ierr); 1383 navg = navg/size; 1384 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1385 PetscFunctionReturn(0); 1386 } 1387 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1388 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1389 MatInfo info; 1390 PetscBool inodes; 1391 1392 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1393 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1394 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1395 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1396 if (!inodes) { 1397 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1398 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1399 } else { 1400 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1401 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1402 } 1403 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1404 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1405 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1406 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1407 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1408 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1409 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1410 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1413 PetscInt inodecount,inodelimit,*inodes; 1414 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1415 if (inodes) { 1416 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1417 } else { 1418 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1419 } 1420 PetscFunctionReturn(0); 1421 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1422 PetscFunctionReturn(0); 1423 } 1424 } else if (isbinary) { 1425 if (size == 1) { 1426 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1427 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1428 } else { 1429 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1430 } 1431 PetscFunctionReturn(0); 1432 } else if (isdraw) { 1433 PetscDraw draw; 1434 PetscBool isnull; 1435 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1436 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1437 if (isnull) PetscFunctionReturn(0); 1438 } 1439 1440 { 1441 /* assemble the entire matrix onto first processor. */ 1442 Mat A; 1443 Mat_SeqAIJ *Aloc; 1444 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1445 MatScalar *a; 1446 1447 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1448 if (!rank) { 1449 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1450 } else { 1451 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1452 } 1453 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1454 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1455 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1456 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1457 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1458 1459 /* copy over the A part */ 1460 Aloc = (Mat_SeqAIJ*)aij->A->data; 1461 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1462 row = mat->rmap->rstart; 1463 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1464 for (i=0; i<m; i++) { 1465 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1466 row++; 1467 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1468 } 1469 aj = Aloc->j; 1470 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1471 1472 /* copy over the B part */ 1473 Aloc = (Mat_SeqAIJ*)aij->B->data; 1474 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1475 row = mat->rmap->rstart; 1476 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1477 ct = cols; 1478 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1479 for (i=0; i<m; i++) { 1480 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1481 row++; 1482 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1483 } 1484 ierr = PetscFree(ct);CHKERRQ(ierr); 1485 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1486 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1487 /* 1488 Everyone has to call to draw the matrix since the graphics waits are 1489 synchronized across all processors that share the PetscDraw object 1490 */ 1491 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1492 if (!rank) { 1493 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1494 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1495 } 1496 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1497 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1498 ierr = MatDestroy(&A);CHKERRQ(ierr); 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1504 { 1505 PetscErrorCode ierr; 1506 PetscBool iascii,isdraw,issocket,isbinary; 1507 1508 PetscFunctionBegin; 1509 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1510 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1511 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1512 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1513 if (iascii || isdraw || isbinary || issocket) { 1514 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1515 } 1516 PetscFunctionReturn(0); 1517 } 1518 1519 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1520 { 1521 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1522 PetscErrorCode ierr; 1523 Vec bb1 = 0; 1524 PetscBool hasop; 1525 1526 PetscFunctionBegin; 1527 if (flag == SOR_APPLY_UPPER) { 1528 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1533 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1534 } 1535 1536 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1537 if (flag & SOR_ZERO_INITIAL_GUESS) { 1538 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1539 its--; 1540 } 1541 1542 while (its--) { 1543 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1544 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1545 1546 /* update rhs: bb1 = bb - B*x */ 1547 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1548 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1549 1550 /* local sweep */ 1551 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1552 } 1553 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1554 if (flag & SOR_ZERO_INITIAL_GUESS) { 1555 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1556 its--; 1557 } 1558 while (its--) { 1559 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1560 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1561 1562 /* update rhs: bb1 = bb - B*x */ 1563 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1564 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1565 1566 /* local sweep */ 1567 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1568 } 1569 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1570 if (flag & SOR_ZERO_INITIAL_GUESS) { 1571 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1572 its--; 1573 } 1574 while (its--) { 1575 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1576 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1577 1578 /* update rhs: bb1 = bb - B*x */ 1579 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1580 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1581 1582 /* local sweep */ 1583 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1584 } 1585 } else if (flag & SOR_EISENSTAT) { 1586 Vec xx1; 1587 1588 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1589 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1590 1591 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1592 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1593 if (!mat->diag) { 1594 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1595 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1596 } 1597 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1598 if (hasop) { 1599 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1600 } else { 1601 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1602 } 1603 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1604 1605 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1606 1607 /* local sweep */ 1608 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1609 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1610 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1611 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1612 1613 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1614 1615 matin->factorerrortype = mat->A->factorerrortype; 1616 PetscFunctionReturn(0); 1617 } 1618 1619 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1620 { 1621 Mat aA,aB,Aperm; 1622 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1623 PetscScalar *aa,*ba; 1624 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1625 PetscSF rowsf,sf; 1626 IS parcolp = NULL; 1627 PetscBool done; 1628 PetscErrorCode ierr; 1629 1630 PetscFunctionBegin; 1631 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1632 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1633 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1634 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1635 1636 /* Invert row permutation to find out where my rows should go */ 1637 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1638 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1639 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1640 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1641 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1642 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1643 1644 /* Invert column permutation to find out where my columns should go */ 1645 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1646 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1647 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1648 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1649 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1650 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1651 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1652 1653 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1654 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1655 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1656 1657 /* Find out where my gcols should go */ 1658 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1659 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1660 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1661 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1662 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1663 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1664 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1665 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1666 1667 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1668 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1669 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1670 for (i=0; i<m; i++) { 1671 PetscInt row = rdest[i],rowner; 1672 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1673 for (j=ai[i]; j<ai[i+1]; j++) { 1674 PetscInt cowner,col = cdest[aj[j]]; 1675 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1676 if (rowner == cowner) dnnz[i]++; 1677 else onnz[i]++; 1678 } 1679 for (j=bi[i]; j<bi[i+1]; j++) { 1680 PetscInt cowner,col = gcdest[bj[j]]; 1681 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1682 if (rowner == cowner) dnnz[i]++; 1683 else onnz[i]++; 1684 } 1685 } 1686 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1687 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1688 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1689 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1690 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1691 1692 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1693 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1694 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1695 for (i=0; i<m; i++) { 1696 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1697 PetscInt j0,rowlen; 1698 rowlen = ai[i+1] - ai[i]; 1699 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1700 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1701 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1702 } 1703 rowlen = bi[i+1] - bi[i]; 1704 for (j0=j=0; j<rowlen; j0=j) { 1705 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1706 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1707 } 1708 } 1709 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1710 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1711 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1712 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1713 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1714 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1715 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1716 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1717 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1718 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1719 *B = Aperm; 1720 PetscFunctionReturn(0); 1721 } 1722 1723 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1724 { 1725 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1726 PetscErrorCode ierr; 1727 1728 PetscFunctionBegin; 1729 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1730 if (ghosts) *ghosts = aij->garray; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1735 { 1736 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1737 Mat A = mat->A,B = mat->B; 1738 PetscErrorCode ierr; 1739 PetscReal isend[5],irecv[5]; 1740 1741 PetscFunctionBegin; 1742 info->block_size = 1.0; 1743 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1744 1745 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1746 isend[3] = info->memory; isend[4] = info->mallocs; 1747 1748 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1749 1750 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1751 isend[3] += info->memory; isend[4] += info->mallocs; 1752 if (flag == MAT_LOCAL) { 1753 info->nz_used = isend[0]; 1754 info->nz_allocated = isend[1]; 1755 info->nz_unneeded = isend[2]; 1756 info->memory = isend[3]; 1757 info->mallocs = isend[4]; 1758 } else if (flag == MAT_GLOBAL_MAX) { 1759 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1760 1761 info->nz_used = irecv[0]; 1762 info->nz_allocated = irecv[1]; 1763 info->nz_unneeded = irecv[2]; 1764 info->memory = irecv[3]; 1765 info->mallocs = irecv[4]; 1766 } else if (flag == MAT_GLOBAL_SUM) { 1767 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1768 1769 info->nz_used = irecv[0]; 1770 info->nz_allocated = irecv[1]; 1771 info->nz_unneeded = irecv[2]; 1772 info->memory = irecv[3]; 1773 info->mallocs = irecv[4]; 1774 } 1775 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1776 info->fill_ratio_needed = 0; 1777 info->factor_mallocs = 0; 1778 PetscFunctionReturn(0); 1779 } 1780 1781 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1782 { 1783 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1784 PetscErrorCode ierr; 1785 1786 PetscFunctionBegin; 1787 switch (op) { 1788 case MAT_NEW_NONZERO_LOCATIONS: 1789 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1790 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1791 case MAT_KEEP_NONZERO_PATTERN: 1792 case MAT_NEW_NONZERO_LOCATION_ERR: 1793 case MAT_USE_INODES: 1794 case MAT_IGNORE_ZERO_ENTRIES: 1795 MatCheckPreallocated(A,1); 1796 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1797 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1798 break; 1799 case MAT_ROW_ORIENTED: 1800 MatCheckPreallocated(A,1); 1801 a->roworiented = flg; 1802 1803 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1804 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1805 break; 1806 case MAT_NEW_DIAGONALS: 1807 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1808 break; 1809 case MAT_IGNORE_OFF_PROC_ENTRIES: 1810 a->donotstash = flg; 1811 break; 1812 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1813 case MAT_SPD: 1814 case MAT_SYMMETRIC: 1815 case MAT_STRUCTURALLY_SYMMETRIC: 1816 case MAT_HERMITIAN: 1817 case MAT_SYMMETRY_ETERNAL: 1818 break; 1819 case MAT_SUBMAT_SINGLEIS: 1820 A->submat_singleis = flg; 1821 break; 1822 case MAT_STRUCTURE_ONLY: 1823 /* The option is handled directly by MatSetOption() */ 1824 break; 1825 default: 1826 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1827 } 1828 PetscFunctionReturn(0); 1829 } 1830 1831 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1832 { 1833 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1834 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1835 PetscErrorCode ierr; 1836 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1837 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1838 PetscInt *cmap,*idx_p; 1839 1840 PetscFunctionBegin; 1841 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1842 mat->getrowactive = PETSC_TRUE; 1843 1844 if (!mat->rowvalues && (idx || v)) { 1845 /* 1846 allocate enough space to hold information from the longest row. 1847 */ 1848 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1849 PetscInt max = 1,tmp; 1850 for (i=0; i<matin->rmap->n; i++) { 1851 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1852 if (max < tmp) max = tmp; 1853 } 1854 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1855 } 1856 1857 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1858 lrow = row - rstart; 1859 1860 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1861 if (!v) {pvA = 0; pvB = 0;} 1862 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1863 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1864 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1865 nztot = nzA + nzB; 1866 1867 cmap = mat->garray; 1868 if (v || idx) { 1869 if (nztot) { 1870 /* Sort by increasing column numbers, assuming A and B already sorted */ 1871 PetscInt imark = -1; 1872 if (v) { 1873 *v = v_p = mat->rowvalues; 1874 for (i=0; i<nzB; i++) { 1875 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1876 else break; 1877 } 1878 imark = i; 1879 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1880 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1881 } 1882 if (idx) { 1883 *idx = idx_p = mat->rowindices; 1884 if (imark > -1) { 1885 for (i=0; i<imark; i++) { 1886 idx_p[i] = cmap[cworkB[i]]; 1887 } 1888 } else { 1889 for (i=0; i<nzB; i++) { 1890 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1891 else break; 1892 } 1893 imark = i; 1894 } 1895 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1896 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1897 } 1898 } else { 1899 if (idx) *idx = 0; 1900 if (v) *v = 0; 1901 } 1902 } 1903 *nz = nztot; 1904 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1905 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1910 { 1911 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1912 1913 PetscFunctionBegin; 1914 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1915 aij->getrowactive = PETSC_FALSE; 1916 PetscFunctionReturn(0); 1917 } 1918 1919 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1920 { 1921 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1922 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1923 PetscErrorCode ierr; 1924 PetscInt i,j,cstart = mat->cmap->rstart; 1925 PetscReal sum = 0.0; 1926 MatScalar *v; 1927 1928 PetscFunctionBegin; 1929 if (aij->size == 1) { 1930 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1931 } else { 1932 if (type == NORM_FROBENIUS) { 1933 v = amat->a; 1934 for (i=0; i<amat->nz; i++) { 1935 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1936 } 1937 v = bmat->a; 1938 for (i=0; i<bmat->nz; i++) { 1939 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1940 } 1941 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1942 *norm = PetscSqrtReal(*norm); 1943 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1944 } else if (type == NORM_1) { /* max column norm */ 1945 PetscReal *tmp,*tmp2; 1946 PetscInt *jj,*garray = aij->garray; 1947 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1948 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1949 *norm = 0.0; 1950 v = amat->a; jj = amat->j; 1951 for (j=0; j<amat->nz; j++) { 1952 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1953 } 1954 v = bmat->a; jj = bmat->j; 1955 for (j=0; j<bmat->nz; j++) { 1956 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1957 } 1958 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1959 for (j=0; j<mat->cmap->N; j++) { 1960 if (tmp2[j] > *norm) *norm = tmp2[j]; 1961 } 1962 ierr = PetscFree(tmp);CHKERRQ(ierr); 1963 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1964 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1965 } else if (type == NORM_INFINITY) { /* max row norm */ 1966 PetscReal ntemp = 0.0; 1967 for (j=0; j<aij->A->rmap->n; j++) { 1968 v = amat->a + amat->i[j]; 1969 sum = 0.0; 1970 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1971 sum += PetscAbsScalar(*v); v++; 1972 } 1973 v = bmat->a + bmat->i[j]; 1974 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1975 sum += PetscAbsScalar(*v); v++; 1976 } 1977 if (sum > ntemp) ntemp = sum; 1978 } 1979 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1980 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1981 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1982 } 1983 PetscFunctionReturn(0); 1984 } 1985 1986 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1987 { 1988 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1989 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1990 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol; 1991 PetscErrorCode ierr; 1992 Mat B,A_diag,*B_diag; 1993 MatScalar *array; 1994 1995 PetscFunctionBegin; 1996 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1997 ai = Aloc->i; aj = Aloc->j; 1998 bi = Bloc->i; bj = Bloc->j; 1999 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2000 PetscInt *d_nnz,*g_nnz,*o_nnz; 2001 PetscSFNode *oloc; 2002 PETSC_UNUSED PetscSF sf; 2003 2004 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2005 /* compute d_nnz for preallocation */ 2006 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2007 for (i=0; i<ai[ma]; i++) { 2008 d_nnz[aj[i]]++; 2009 } 2010 /* compute local off-diagonal contributions */ 2011 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2012 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2013 /* map those to global */ 2014 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2015 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2016 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2017 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2018 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2019 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2020 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2021 2022 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2023 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2024 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2025 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2026 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2027 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2028 } else { 2029 B = *matout; 2030 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2031 } 2032 2033 b = (Mat_MPIAIJ*)B->data; 2034 A_diag = a->A; 2035 B_diag = &b->A; 2036 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2037 A_diag_ncol = A_diag->cmap->N; 2038 B_diag_ilen = sub_B_diag->ilen; 2039 B_diag_i = sub_B_diag->i; 2040 2041 /* Set ilen for diagonal of B */ 2042 for (i=0; i<A_diag_ncol; i++) { 2043 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2044 } 2045 2046 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2047 very quickly (=without using MatSetValues), because all writes are local. */ 2048 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2049 2050 /* copy over the B part */ 2051 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2052 array = Bloc->a; 2053 row = A->rmap->rstart; 2054 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2055 cols_tmp = cols; 2056 for (i=0; i<mb; i++) { 2057 ncol = bi[i+1]-bi[i]; 2058 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2059 row++; 2060 array += ncol; cols_tmp += ncol; 2061 } 2062 ierr = PetscFree(cols);CHKERRQ(ierr); 2063 2064 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2065 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2066 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2067 *matout = B; 2068 } else { 2069 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2070 } 2071 PetscFunctionReturn(0); 2072 } 2073 2074 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2075 { 2076 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2077 Mat a = aij->A,b = aij->B; 2078 PetscErrorCode ierr; 2079 PetscInt s1,s2,s3; 2080 2081 PetscFunctionBegin; 2082 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2083 if (rr) { 2084 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2085 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2086 /* Overlap communication with computation. */ 2087 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2088 } 2089 if (ll) { 2090 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2091 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2092 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2093 } 2094 /* scale the diagonal block */ 2095 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2096 2097 if (rr) { 2098 /* Do a scatter end and then right scale the off-diagonal block */ 2099 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2100 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2101 } 2102 PetscFunctionReturn(0); 2103 } 2104 2105 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2106 { 2107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2108 PetscErrorCode ierr; 2109 2110 PetscFunctionBegin; 2111 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2112 PetscFunctionReturn(0); 2113 } 2114 2115 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2116 { 2117 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2118 Mat a,b,c,d; 2119 PetscBool flg; 2120 PetscErrorCode ierr; 2121 2122 PetscFunctionBegin; 2123 a = matA->A; b = matA->B; 2124 c = matB->A; d = matB->B; 2125 2126 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2127 if (flg) { 2128 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2129 } 2130 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2131 PetscFunctionReturn(0); 2132 } 2133 2134 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2135 { 2136 PetscErrorCode ierr; 2137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2138 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2139 2140 PetscFunctionBegin; 2141 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2142 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2143 /* because of the column compression in the off-processor part of the matrix a->B, 2144 the number of columns in a->B and b->B may be different, hence we cannot call 2145 the MatCopy() directly on the two parts. If need be, we can provide a more 2146 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2147 then copying the submatrices */ 2148 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2149 } else { 2150 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2151 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2152 } 2153 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2154 PetscFunctionReturn(0); 2155 } 2156 2157 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2158 { 2159 PetscErrorCode ierr; 2160 2161 PetscFunctionBegin; 2162 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 /* 2167 Computes the number of nonzeros per row needed for preallocation when X and Y 2168 have different nonzero structure. 2169 */ 2170 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2171 { 2172 PetscInt i,j,k,nzx,nzy; 2173 2174 PetscFunctionBegin; 2175 /* Set the number of nonzeros in the new matrix */ 2176 for (i=0; i<m; i++) { 2177 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2178 nzx = xi[i+1] - xi[i]; 2179 nzy = yi[i+1] - yi[i]; 2180 nnz[i] = 0; 2181 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2182 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2183 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2184 nnz[i]++; 2185 } 2186 for (; k<nzy; k++) nnz[i]++; 2187 } 2188 PetscFunctionReturn(0); 2189 } 2190 2191 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2192 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2193 { 2194 PetscErrorCode ierr; 2195 PetscInt m = Y->rmap->N; 2196 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2197 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2198 2199 PetscFunctionBegin; 2200 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2201 PetscFunctionReturn(0); 2202 } 2203 2204 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2205 { 2206 PetscErrorCode ierr; 2207 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2208 PetscBLASInt bnz,one=1; 2209 Mat_SeqAIJ *x,*y; 2210 2211 PetscFunctionBegin; 2212 if (str == SAME_NONZERO_PATTERN) { 2213 PetscScalar alpha = a; 2214 x = (Mat_SeqAIJ*)xx->A->data; 2215 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2216 y = (Mat_SeqAIJ*)yy->A->data; 2217 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2218 x = (Mat_SeqAIJ*)xx->B->data; 2219 y = (Mat_SeqAIJ*)yy->B->data; 2220 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2221 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2222 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2223 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2224 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2225 } else { 2226 Mat B; 2227 PetscInt *nnz_d,*nnz_o; 2228 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2229 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2230 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2231 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2232 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2233 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2234 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2235 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2236 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2237 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2238 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2239 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2240 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2241 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2242 } 2243 PetscFunctionReturn(0); 2244 } 2245 2246 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2247 2248 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2249 { 2250 #if defined(PETSC_USE_COMPLEX) 2251 PetscErrorCode ierr; 2252 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2253 2254 PetscFunctionBegin; 2255 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2256 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2257 #else 2258 PetscFunctionBegin; 2259 #endif 2260 PetscFunctionReturn(0); 2261 } 2262 2263 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2264 { 2265 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2266 PetscErrorCode ierr; 2267 2268 PetscFunctionBegin; 2269 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2270 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2271 PetscFunctionReturn(0); 2272 } 2273 2274 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2275 { 2276 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2277 PetscErrorCode ierr; 2278 2279 PetscFunctionBegin; 2280 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2281 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2282 PetscFunctionReturn(0); 2283 } 2284 2285 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2286 { 2287 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2288 PetscErrorCode ierr; 2289 PetscInt i,*idxb = 0; 2290 PetscScalar *va,*vb; 2291 Vec vtmp; 2292 2293 PetscFunctionBegin; 2294 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2295 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2296 if (idx) { 2297 for (i=0; i<A->rmap->n; i++) { 2298 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2299 } 2300 } 2301 2302 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2303 if (idx) { 2304 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2305 } 2306 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2307 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2308 2309 for (i=0; i<A->rmap->n; i++) { 2310 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2311 va[i] = vb[i]; 2312 if (idx) idx[i] = a->garray[idxb[i]]; 2313 } 2314 } 2315 2316 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2317 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2318 ierr = PetscFree(idxb);CHKERRQ(ierr); 2319 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2320 PetscFunctionReturn(0); 2321 } 2322 2323 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2324 { 2325 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2326 PetscErrorCode ierr; 2327 PetscInt i,*idxb = 0; 2328 PetscScalar *va,*vb; 2329 Vec vtmp; 2330 2331 PetscFunctionBegin; 2332 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2333 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2334 if (idx) { 2335 for (i=0; i<A->cmap->n; i++) { 2336 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2337 } 2338 } 2339 2340 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2341 if (idx) { 2342 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2343 } 2344 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2345 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2346 2347 for (i=0; i<A->rmap->n; i++) { 2348 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2349 va[i] = vb[i]; 2350 if (idx) idx[i] = a->garray[idxb[i]]; 2351 } 2352 } 2353 2354 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2355 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2356 ierr = PetscFree(idxb);CHKERRQ(ierr); 2357 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2358 PetscFunctionReturn(0); 2359 } 2360 2361 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2362 { 2363 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2364 PetscInt n = A->rmap->n; 2365 PetscInt cstart = A->cmap->rstart; 2366 PetscInt *cmap = mat->garray; 2367 PetscInt *diagIdx, *offdiagIdx; 2368 Vec diagV, offdiagV; 2369 PetscScalar *a, *diagA, *offdiagA; 2370 PetscInt r; 2371 PetscErrorCode ierr; 2372 2373 PetscFunctionBegin; 2374 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2375 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2376 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2377 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2378 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2379 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2380 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2381 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2382 for (r = 0; r < n; ++r) { 2383 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2384 a[r] = diagA[r]; 2385 idx[r] = cstart + diagIdx[r]; 2386 } else { 2387 a[r] = offdiagA[r]; 2388 idx[r] = cmap[offdiagIdx[r]]; 2389 } 2390 } 2391 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2392 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2393 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2394 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2395 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2396 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2397 PetscFunctionReturn(0); 2398 } 2399 2400 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2401 { 2402 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2403 PetscInt n = A->rmap->n; 2404 PetscInt cstart = A->cmap->rstart; 2405 PetscInt *cmap = mat->garray; 2406 PetscInt *diagIdx, *offdiagIdx; 2407 Vec diagV, offdiagV; 2408 PetscScalar *a, *diagA, *offdiagA; 2409 PetscInt r; 2410 PetscErrorCode ierr; 2411 2412 PetscFunctionBegin; 2413 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2414 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2415 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2416 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2417 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2418 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2419 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2420 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2421 for (r = 0; r < n; ++r) { 2422 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2423 a[r] = diagA[r]; 2424 idx[r] = cstart + diagIdx[r]; 2425 } else { 2426 a[r] = offdiagA[r]; 2427 idx[r] = cmap[offdiagIdx[r]]; 2428 } 2429 } 2430 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2431 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2432 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2433 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2434 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2435 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2436 PetscFunctionReturn(0); 2437 } 2438 2439 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2440 { 2441 PetscErrorCode ierr; 2442 Mat *dummy; 2443 2444 PetscFunctionBegin; 2445 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2446 *newmat = *dummy; 2447 ierr = PetscFree(dummy);CHKERRQ(ierr); 2448 PetscFunctionReturn(0); 2449 } 2450 2451 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2452 { 2453 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2454 PetscErrorCode ierr; 2455 2456 PetscFunctionBegin; 2457 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2458 A->factorerrortype = a->A->factorerrortype; 2459 PetscFunctionReturn(0); 2460 } 2461 2462 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2463 { 2464 PetscErrorCode ierr; 2465 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2466 2467 PetscFunctionBegin; 2468 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2469 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2470 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2471 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2476 { 2477 PetscFunctionBegin; 2478 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2479 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2480 PetscFunctionReturn(0); 2481 } 2482 2483 /*@ 2484 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2485 2486 Collective on Mat 2487 2488 Input Parameters: 2489 + A - the matrix 2490 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2491 2492 Level: advanced 2493 2494 @*/ 2495 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2496 { 2497 PetscErrorCode ierr; 2498 2499 PetscFunctionBegin; 2500 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2505 { 2506 PetscErrorCode ierr; 2507 PetscBool sc = PETSC_FALSE,flg; 2508 2509 PetscFunctionBegin; 2510 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2511 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2512 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2513 if (flg) { 2514 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2515 } 2516 ierr = PetscOptionsTail();CHKERRQ(ierr); 2517 PetscFunctionReturn(0); 2518 } 2519 2520 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2521 { 2522 PetscErrorCode ierr; 2523 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2524 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2525 2526 PetscFunctionBegin; 2527 if (!Y->preallocated) { 2528 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2529 } else if (!aij->nz) { 2530 PetscInt nonew = aij->nonew; 2531 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2532 aij->nonew = nonew; 2533 } 2534 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2539 { 2540 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2541 PetscErrorCode ierr; 2542 2543 PetscFunctionBegin; 2544 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2545 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2546 if (d) { 2547 PetscInt rstart; 2548 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2549 *d += rstart; 2550 2551 } 2552 PetscFunctionReturn(0); 2553 } 2554 2555 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2556 { 2557 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2558 PetscErrorCode ierr; 2559 2560 PetscFunctionBegin; 2561 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2562 PetscFunctionReturn(0); 2563 } 2564 2565 /* -------------------------------------------------------------------*/ 2566 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2567 MatGetRow_MPIAIJ, 2568 MatRestoreRow_MPIAIJ, 2569 MatMult_MPIAIJ, 2570 /* 4*/ MatMultAdd_MPIAIJ, 2571 MatMultTranspose_MPIAIJ, 2572 MatMultTransposeAdd_MPIAIJ, 2573 0, 2574 0, 2575 0, 2576 /*10*/ 0, 2577 0, 2578 0, 2579 MatSOR_MPIAIJ, 2580 MatTranspose_MPIAIJ, 2581 /*15*/ MatGetInfo_MPIAIJ, 2582 MatEqual_MPIAIJ, 2583 MatGetDiagonal_MPIAIJ, 2584 MatDiagonalScale_MPIAIJ, 2585 MatNorm_MPIAIJ, 2586 /*20*/ MatAssemblyBegin_MPIAIJ, 2587 MatAssemblyEnd_MPIAIJ, 2588 MatSetOption_MPIAIJ, 2589 MatZeroEntries_MPIAIJ, 2590 /*24*/ MatZeroRows_MPIAIJ, 2591 0, 2592 0, 2593 0, 2594 0, 2595 /*29*/ MatSetUp_MPIAIJ, 2596 0, 2597 0, 2598 MatGetDiagonalBlock_MPIAIJ, 2599 0, 2600 /*34*/ MatDuplicate_MPIAIJ, 2601 0, 2602 0, 2603 0, 2604 0, 2605 /*39*/ MatAXPY_MPIAIJ, 2606 MatCreateSubMatrices_MPIAIJ, 2607 MatIncreaseOverlap_MPIAIJ, 2608 MatGetValues_MPIAIJ, 2609 MatCopy_MPIAIJ, 2610 /*44*/ MatGetRowMax_MPIAIJ, 2611 MatScale_MPIAIJ, 2612 MatShift_MPIAIJ, 2613 MatDiagonalSet_MPIAIJ, 2614 MatZeroRowsColumns_MPIAIJ, 2615 /*49*/ MatSetRandom_MPIAIJ, 2616 0, 2617 0, 2618 0, 2619 0, 2620 /*54*/ MatFDColoringCreate_MPIXAIJ, 2621 0, 2622 MatSetUnfactored_MPIAIJ, 2623 MatPermute_MPIAIJ, 2624 0, 2625 /*59*/ MatCreateSubMatrix_MPIAIJ, 2626 MatDestroy_MPIAIJ, 2627 MatView_MPIAIJ, 2628 0, 2629 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2630 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2631 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2632 0, 2633 0, 2634 0, 2635 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2636 MatGetRowMinAbs_MPIAIJ, 2637 0, 2638 0, 2639 0, 2640 0, 2641 /*75*/ MatFDColoringApply_AIJ, 2642 MatSetFromOptions_MPIAIJ, 2643 0, 2644 0, 2645 MatFindZeroDiagonals_MPIAIJ, 2646 /*80*/ 0, 2647 0, 2648 0, 2649 /*83*/ MatLoad_MPIAIJ, 2650 MatIsSymmetric_MPIAIJ, 2651 0, 2652 0, 2653 0, 2654 0, 2655 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2656 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2657 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2658 MatPtAP_MPIAIJ_MPIAIJ, 2659 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2660 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2661 0, 2662 0, 2663 0, 2664 0, 2665 /*99*/ 0, 2666 0, 2667 0, 2668 MatConjugate_MPIAIJ, 2669 0, 2670 /*104*/MatSetValuesRow_MPIAIJ, 2671 MatRealPart_MPIAIJ, 2672 MatImaginaryPart_MPIAIJ, 2673 0, 2674 0, 2675 /*109*/0, 2676 0, 2677 MatGetRowMin_MPIAIJ, 2678 0, 2679 MatMissingDiagonal_MPIAIJ, 2680 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2681 0, 2682 MatGetGhosts_MPIAIJ, 2683 0, 2684 0, 2685 /*119*/0, 2686 0, 2687 0, 2688 0, 2689 MatGetMultiProcBlock_MPIAIJ, 2690 /*124*/MatFindNonzeroRows_MPIAIJ, 2691 MatGetColumnNorms_MPIAIJ, 2692 MatInvertBlockDiagonal_MPIAIJ, 2693 MatInvertVariableBlockDiagonal_MPIAIJ, 2694 MatCreateSubMatricesMPI_MPIAIJ, 2695 /*129*/0, 2696 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2697 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2698 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2699 0, 2700 /*134*/0, 2701 0, 2702 MatRARt_MPIAIJ_MPIAIJ, 2703 0, 2704 0, 2705 /*139*/MatSetBlockSizes_MPIAIJ, 2706 0, 2707 0, 2708 MatFDColoringSetUp_MPIXAIJ, 2709 MatFindOffBlockDiagonalEntries_MPIAIJ, 2710 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2711 }; 2712 2713 /* ----------------------------------------------------------------------------------------*/ 2714 2715 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2716 { 2717 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2718 PetscErrorCode ierr; 2719 2720 PetscFunctionBegin; 2721 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2722 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2723 PetscFunctionReturn(0); 2724 } 2725 2726 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2727 { 2728 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2729 PetscErrorCode ierr; 2730 2731 PetscFunctionBegin; 2732 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2733 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2734 PetscFunctionReturn(0); 2735 } 2736 2737 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2738 { 2739 Mat_MPIAIJ *b; 2740 PetscErrorCode ierr; 2741 2742 PetscFunctionBegin; 2743 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2744 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2745 b = (Mat_MPIAIJ*)B->data; 2746 2747 #if defined(PETSC_USE_CTABLE) 2748 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2749 #else 2750 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2751 #endif 2752 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2753 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2754 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2755 2756 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2757 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2758 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2759 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2760 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2761 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2762 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2763 2764 if (!B->preallocated) { 2765 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2766 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2767 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2768 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2769 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2770 } 2771 2772 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2773 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2774 B->preallocated = PETSC_TRUE; 2775 B->was_assembled = PETSC_FALSE; 2776 B->assembled = PETSC_FALSE; 2777 PetscFunctionReturn(0); 2778 } 2779 2780 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2781 { 2782 Mat_MPIAIJ *b; 2783 PetscErrorCode ierr; 2784 2785 PetscFunctionBegin; 2786 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2787 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2788 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2789 b = (Mat_MPIAIJ*)B->data; 2790 2791 #if defined(PETSC_USE_CTABLE) 2792 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2793 #else 2794 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2795 #endif 2796 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2797 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2798 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2799 2800 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2801 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2802 B->preallocated = PETSC_TRUE; 2803 B->was_assembled = PETSC_FALSE; 2804 B->assembled = PETSC_FALSE; 2805 PetscFunctionReturn(0); 2806 } 2807 2808 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2809 { 2810 Mat mat; 2811 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2812 PetscErrorCode ierr; 2813 2814 PetscFunctionBegin; 2815 *newmat = 0; 2816 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2817 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2818 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2819 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2820 a = (Mat_MPIAIJ*)mat->data; 2821 2822 mat->factortype = matin->factortype; 2823 mat->assembled = PETSC_TRUE; 2824 mat->insertmode = NOT_SET_VALUES; 2825 mat->preallocated = PETSC_TRUE; 2826 2827 a->size = oldmat->size; 2828 a->rank = oldmat->rank; 2829 a->donotstash = oldmat->donotstash; 2830 a->roworiented = oldmat->roworiented; 2831 a->rowindices = 0; 2832 a->rowvalues = 0; 2833 a->getrowactive = PETSC_FALSE; 2834 2835 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2836 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2837 2838 if (oldmat->colmap) { 2839 #if defined(PETSC_USE_CTABLE) 2840 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2841 #else 2842 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2843 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2844 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2845 #endif 2846 } else a->colmap = 0; 2847 if (oldmat->garray) { 2848 PetscInt len; 2849 len = oldmat->B->cmap->n; 2850 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2851 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2852 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2853 } else a->garray = 0; 2854 2855 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2856 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2857 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2858 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2859 2860 if (oldmat->Mvctx_mpi1) { 2861 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2862 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2863 } 2864 2865 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2866 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2867 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2868 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2869 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2870 *newmat = mat; 2871 PetscFunctionReturn(0); 2872 } 2873 2874 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2875 { 2876 PetscBool isbinary, ishdf5; 2877 PetscErrorCode ierr; 2878 2879 PetscFunctionBegin; 2880 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2881 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2882 /* force binary viewer to load .info file if it has not yet done so */ 2883 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2884 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2885 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2886 if (isbinary) { 2887 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2888 } else if (ishdf5) { 2889 #if defined(PETSC_HAVE_HDF5) 2890 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2891 #else 2892 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2893 #endif 2894 } else { 2895 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2896 } 2897 PetscFunctionReturn(0); 2898 } 2899 2900 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2901 { 2902 PetscScalar *vals,*svals; 2903 MPI_Comm comm; 2904 PetscErrorCode ierr; 2905 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2906 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2907 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2908 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2909 PetscInt cend,cstart,n,*rowners; 2910 int fd; 2911 PetscInt bs = newMat->rmap->bs; 2912 2913 PetscFunctionBegin; 2914 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2915 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2916 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2917 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2918 if (!rank) { 2919 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2920 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2921 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2922 } 2923 2924 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2925 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2926 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2927 if (bs < 0) bs = 1; 2928 2929 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2930 M = header[1]; N = header[2]; 2931 2932 /* If global sizes are set, check if they are consistent with that given in the file */ 2933 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2934 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2935 2936 /* determine ownership of all (block) rows */ 2937 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2938 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2939 else m = newMat->rmap->n; /* Set by user */ 2940 2941 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2942 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2943 2944 /* First process needs enough room for process with most rows */ 2945 if (!rank) { 2946 mmax = rowners[1]; 2947 for (i=2; i<=size; i++) { 2948 mmax = PetscMax(mmax, rowners[i]); 2949 } 2950 } else mmax = -1; /* unused, but compilers complain */ 2951 2952 rowners[0] = 0; 2953 for (i=2; i<=size; i++) { 2954 rowners[i] += rowners[i-1]; 2955 } 2956 rstart = rowners[rank]; 2957 rend = rowners[rank+1]; 2958 2959 /* distribute row lengths to all processors */ 2960 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2961 if (!rank) { 2962 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2963 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2964 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2965 for (j=0; j<m; j++) { 2966 procsnz[0] += ourlens[j]; 2967 } 2968 for (i=1; i<size; i++) { 2969 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2970 /* calculate the number of nonzeros on each processor */ 2971 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2972 procsnz[i] += rowlengths[j]; 2973 } 2974 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2975 } 2976 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2977 } else { 2978 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2979 } 2980 2981 if (!rank) { 2982 /* determine max buffer needed and allocate it */ 2983 maxnz = 0; 2984 for (i=0; i<size; i++) { 2985 maxnz = PetscMax(maxnz,procsnz[i]); 2986 } 2987 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2988 2989 /* read in my part of the matrix column indices */ 2990 nz = procsnz[0]; 2991 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2992 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2993 2994 /* read in every one elses and ship off */ 2995 for (i=1; i<size; i++) { 2996 nz = procsnz[i]; 2997 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2998 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2999 } 3000 ierr = PetscFree(cols);CHKERRQ(ierr); 3001 } else { 3002 /* determine buffer space needed for message */ 3003 nz = 0; 3004 for (i=0; i<m; i++) { 3005 nz += ourlens[i]; 3006 } 3007 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3008 3009 /* receive message of column indices*/ 3010 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3011 } 3012 3013 /* determine column ownership if matrix is not square */ 3014 if (N != M) { 3015 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3016 else n = newMat->cmap->n; 3017 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3018 cstart = cend - n; 3019 } else { 3020 cstart = rstart; 3021 cend = rend; 3022 n = cend - cstart; 3023 } 3024 3025 /* loop over local rows, determining number of off diagonal entries */ 3026 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3027 jj = 0; 3028 for (i=0; i<m; i++) { 3029 for (j=0; j<ourlens[i]; j++) { 3030 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3031 jj++; 3032 } 3033 } 3034 3035 for (i=0; i<m; i++) { 3036 ourlens[i] -= offlens[i]; 3037 } 3038 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3039 3040 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3041 3042 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3043 3044 for (i=0; i<m; i++) { 3045 ourlens[i] += offlens[i]; 3046 } 3047 3048 if (!rank) { 3049 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3050 3051 /* read in my part of the matrix numerical values */ 3052 nz = procsnz[0]; 3053 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3054 3055 /* insert into matrix */ 3056 jj = rstart; 3057 smycols = mycols; 3058 svals = vals; 3059 for (i=0; i<m; i++) { 3060 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3061 smycols += ourlens[i]; 3062 svals += ourlens[i]; 3063 jj++; 3064 } 3065 3066 /* read in other processors and ship out */ 3067 for (i=1; i<size; i++) { 3068 nz = procsnz[i]; 3069 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3070 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3071 } 3072 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3073 } else { 3074 /* receive numeric values */ 3075 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3076 3077 /* receive message of values*/ 3078 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3079 3080 /* insert into matrix */ 3081 jj = rstart; 3082 smycols = mycols; 3083 svals = vals; 3084 for (i=0; i<m; i++) { 3085 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3086 smycols += ourlens[i]; 3087 svals += ourlens[i]; 3088 jj++; 3089 } 3090 } 3091 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3092 ierr = PetscFree(vals);CHKERRQ(ierr); 3093 ierr = PetscFree(mycols);CHKERRQ(ierr); 3094 ierr = PetscFree(rowners);CHKERRQ(ierr); 3095 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3096 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3097 PetscFunctionReturn(0); 3098 } 3099 3100 /* Not scalable because of ISAllGather() unless getting all columns. */ 3101 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3102 { 3103 PetscErrorCode ierr; 3104 IS iscol_local; 3105 PetscBool isstride; 3106 PetscMPIInt lisstride=0,gisstride; 3107 3108 PetscFunctionBegin; 3109 /* check if we are grabbing all columns*/ 3110 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3111 3112 if (isstride) { 3113 PetscInt start,len,mstart,mlen; 3114 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3115 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3116 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3117 if (mstart == start && mlen-mstart == len) lisstride = 1; 3118 } 3119 3120 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3121 if (gisstride) { 3122 PetscInt N; 3123 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3124 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3125 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3126 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3127 } else { 3128 PetscInt cbs; 3129 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3130 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3131 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3132 } 3133 3134 *isseq = iscol_local; 3135 PetscFunctionReturn(0); 3136 } 3137 3138 /* 3139 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3140 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3141 3142 Input Parameters: 3143 mat - matrix 3144 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3145 i.e., mat->rstart <= isrow[i] < mat->rend 3146 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3147 i.e., mat->cstart <= iscol[i] < mat->cend 3148 Output Parameter: 3149 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3150 iscol_o - sequential column index set for retrieving mat->B 3151 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3152 */ 3153 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3154 { 3155 PetscErrorCode ierr; 3156 Vec x,cmap; 3157 const PetscInt *is_idx; 3158 PetscScalar *xarray,*cmaparray; 3159 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3160 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3161 Mat B=a->B; 3162 Vec lvec=a->lvec,lcmap; 3163 PetscInt i,cstart,cend,Bn=B->cmap->N; 3164 MPI_Comm comm; 3165 VecScatter Mvctx=a->Mvctx; 3166 3167 PetscFunctionBegin; 3168 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3169 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3170 3171 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3172 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3173 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3174 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3175 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3176 3177 /* Get start indices */ 3178 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3179 isstart -= ncols; 3180 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3181 3182 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3183 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3184 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3185 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3186 for (i=0; i<ncols; i++) { 3187 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3188 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3189 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3190 } 3191 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3192 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3193 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3194 3195 /* Get iscol_d */ 3196 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3197 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3198 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3199 3200 /* Get isrow_d */ 3201 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3202 rstart = mat->rmap->rstart; 3203 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3204 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3205 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3206 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3207 3208 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3209 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3210 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3211 3212 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3213 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3214 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3215 3216 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3217 3218 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3219 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3220 3221 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3222 /* off-process column indices */ 3223 count = 0; 3224 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3225 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3226 3227 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3228 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3229 for (i=0; i<Bn; i++) { 3230 if (PetscRealPart(xarray[i]) > -1.0) { 3231 idx[count] = i; /* local column index in off-diagonal part B */ 3232 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3233 count++; 3234 } 3235 } 3236 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3237 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3238 3239 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3240 /* cannot ensure iscol_o has same blocksize as iscol! */ 3241 3242 ierr = PetscFree(idx);CHKERRQ(ierr); 3243 *garray = cmap1; 3244 3245 ierr = VecDestroy(&x);CHKERRQ(ierr); 3246 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3247 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3248 PetscFunctionReturn(0); 3249 } 3250 3251 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3252 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3253 { 3254 PetscErrorCode ierr; 3255 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3256 Mat M = NULL; 3257 MPI_Comm comm; 3258 IS iscol_d,isrow_d,iscol_o; 3259 Mat Asub = NULL,Bsub = NULL; 3260 PetscInt n; 3261 3262 PetscFunctionBegin; 3263 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3264 3265 if (call == MAT_REUSE_MATRIX) { 3266 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3267 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3268 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3269 3270 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3271 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3272 3273 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3274 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3275 3276 /* Update diagonal and off-diagonal portions of submat */ 3277 asub = (Mat_MPIAIJ*)(*submat)->data; 3278 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3279 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3280 if (n) { 3281 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3282 } 3283 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3284 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3285 3286 } else { /* call == MAT_INITIAL_MATRIX) */ 3287 const PetscInt *garray; 3288 PetscInt BsubN; 3289 3290 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3291 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3292 3293 /* Create local submatrices Asub and Bsub */ 3294 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3295 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3296 3297 /* Create submatrix M */ 3298 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3299 3300 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3301 asub = (Mat_MPIAIJ*)M->data; 3302 3303 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3304 n = asub->B->cmap->N; 3305 if (BsubN > n) { 3306 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3307 const PetscInt *idx; 3308 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3309 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3310 3311 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3312 j = 0; 3313 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3314 for (i=0; i<n; i++) { 3315 if (j >= BsubN) break; 3316 while (subgarray[i] > garray[j]) j++; 3317 3318 if (subgarray[i] == garray[j]) { 3319 idx_new[i] = idx[j++]; 3320 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3321 } 3322 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3323 3324 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3325 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3326 3327 } else if (BsubN < n) { 3328 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3329 } 3330 3331 ierr = PetscFree(garray);CHKERRQ(ierr); 3332 *submat = M; 3333 3334 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3335 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3336 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3337 3338 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3339 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3340 3341 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3342 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3343 } 3344 PetscFunctionReturn(0); 3345 } 3346 3347 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3348 { 3349 PetscErrorCode ierr; 3350 IS iscol_local=NULL,isrow_d; 3351 PetscInt csize; 3352 PetscInt n,i,j,start,end; 3353 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3354 MPI_Comm comm; 3355 3356 PetscFunctionBegin; 3357 /* If isrow has same processor distribution as mat, 3358 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3359 if (call == MAT_REUSE_MATRIX) { 3360 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3361 if (isrow_d) { 3362 sameRowDist = PETSC_TRUE; 3363 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3364 } else { 3365 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3366 if (iscol_local) { 3367 sameRowDist = PETSC_TRUE; 3368 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3369 } 3370 } 3371 } else { 3372 /* Check if isrow has same processor distribution as mat */ 3373 sameDist[0] = PETSC_FALSE; 3374 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3375 if (!n) { 3376 sameDist[0] = PETSC_TRUE; 3377 } else { 3378 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3379 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3380 if (i >= start && j < end) { 3381 sameDist[0] = PETSC_TRUE; 3382 } 3383 } 3384 3385 /* Check if iscol has same processor distribution as mat */ 3386 sameDist[1] = PETSC_FALSE; 3387 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3388 if (!n) { 3389 sameDist[1] = PETSC_TRUE; 3390 } else { 3391 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3392 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3393 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3394 } 3395 3396 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3397 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3398 sameRowDist = tsameDist[0]; 3399 } 3400 3401 if (sameRowDist) { 3402 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3403 /* isrow and iscol have same processor distribution as mat */ 3404 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3405 PetscFunctionReturn(0); 3406 } else { /* sameRowDist */ 3407 /* isrow has same processor distribution as mat */ 3408 if (call == MAT_INITIAL_MATRIX) { 3409 PetscBool sorted; 3410 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3411 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3412 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3413 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3414 3415 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3416 if (sorted) { 3417 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3418 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3419 PetscFunctionReturn(0); 3420 } 3421 } else { /* call == MAT_REUSE_MATRIX */ 3422 IS iscol_sub; 3423 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3424 if (iscol_sub) { 3425 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3426 PetscFunctionReturn(0); 3427 } 3428 } 3429 } 3430 } 3431 3432 /* General case: iscol -> iscol_local which has global size of iscol */ 3433 if (call == MAT_REUSE_MATRIX) { 3434 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3435 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3436 } else { 3437 if (!iscol_local) { 3438 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3439 } 3440 } 3441 3442 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3443 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3444 3445 if (call == MAT_INITIAL_MATRIX) { 3446 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3447 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3448 } 3449 PetscFunctionReturn(0); 3450 } 3451 3452 /*@C 3453 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3454 and "off-diagonal" part of the matrix in CSR format. 3455 3456 Collective on MPI_Comm 3457 3458 Input Parameters: 3459 + comm - MPI communicator 3460 . A - "diagonal" portion of matrix 3461 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3462 - garray - global index of B columns 3463 3464 Output Parameter: 3465 . mat - the matrix, with input A as its local diagonal matrix 3466 Level: advanced 3467 3468 Notes: 3469 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3470 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3471 3472 .seealso: MatCreateMPIAIJWithSplitArrays() 3473 @*/ 3474 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3475 { 3476 PetscErrorCode ierr; 3477 Mat_MPIAIJ *maij; 3478 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3479 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3480 PetscScalar *oa=b->a; 3481 Mat Bnew; 3482 PetscInt m,n,N; 3483 3484 PetscFunctionBegin; 3485 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3486 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3487 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3488 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3489 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3490 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3491 3492 /* Get global columns of mat */ 3493 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3494 3495 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3496 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3497 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3498 maij = (Mat_MPIAIJ*)(*mat)->data; 3499 3500 (*mat)->preallocated = PETSC_TRUE; 3501 3502 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3503 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3504 3505 /* Set A as diagonal portion of *mat */ 3506 maij->A = A; 3507 3508 nz = oi[m]; 3509 for (i=0; i<nz; i++) { 3510 col = oj[i]; 3511 oj[i] = garray[col]; 3512 } 3513 3514 /* Set Bnew as off-diagonal portion of *mat */ 3515 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3516 bnew = (Mat_SeqAIJ*)Bnew->data; 3517 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3518 maij->B = Bnew; 3519 3520 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3521 3522 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3523 b->free_a = PETSC_FALSE; 3524 b->free_ij = PETSC_FALSE; 3525 ierr = MatDestroy(&B);CHKERRQ(ierr); 3526 3527 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3528 bnew->free_a = PETSC_TRUE; 3529 bnew->free_ij = PETSC_TRUE; 3530 3531 /* condense columns of maij->B */ 3532 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3533 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3534 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3535 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3536 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3537 PetscFunctionReturn(0); 3538 } 3539 3540 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3541 3542 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3543 { 3544 PetscErrorCode ierr; 3545 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3546 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3547 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3548 Mat M,Msub,B=a->B; 3549 MatScalar *aa; 3550 Mat_SeqAIJ *aij; 3551 PetscInt *garray = a->garray,*colsub,Ncols; 3552 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3553 IS iscol_sub,iscmap; 3554 const PetscInt *is_idx,*cmap; 3555 PetscBool allcolumns=PETSC_FALSE; 3556 MPI_Comm comm; 3557 3558 PetscFunctionBegin; 3559 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3560 3561 if (call == MAT_REUSE_MATRIX) { 3562 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3563 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3564 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3565 3566 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3567 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3568 3569 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3570 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3571 3572 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3573 3574 } else { /* call == MAT_INITIAL_MATRIX) */ 3575 PetscBool flg; 3576 3577 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3578 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3579 3580 /* (1) iscol -> nonscalable iscol_local */ 3581 /* Check for special case: each processor gets entire matrix columns */ 3582 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3583 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3584 if (allcolumns) { 3585 iscol_sub = iscol_local; 3586 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3587 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3588 3589 } else { 3590 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3591 PetscInt *idx,*cmap1,k; 3592 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3593 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3594 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3595 count = 0; 3596 k = 0; 3597 for (i=0; i<Ncols; i++) { 3598 j = is_idx[i]; 3599 if (j >= cstart && j < cend) { 3600 /* diagonal part of mat */ 3601 idx[count] = j; 3602 cmap1[count++] = i; /* column index in submat */ 3603 } else if (Bn) { 3604 /* off-diagonal part of mat */ 3605 if (j == garray[k]) { 3606 idx[count] = j; 3607 cmap1[count++] = i; /* column index in submat */ 3608 } else if (j > garray[k]) { 3609 while (j > garray[k] && k < Bn-1) k++; 3610 if (j == garray[k]) { 3611 idx[count] = j; 3612 cmap1[count++] = i; /* column index in submat */ 3613 } 3614 } 3615 } 3616 } 3617 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3618 3619 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3620 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3621 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3622 3623 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3624 } 3625 3626 /* (3) Create sequential Msub */ 3627 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3628 } 3629 3630 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3631 aij = (Mat_SeqAIJ*)(Msub)->data; 3632 ii = aij->i; 3633 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3634 3635 /* 3636 m - number of local rows 3637 Ncols - number of columns (same on all processors) 3638 rstart - first row in new global matrix generated 3639 */ 3640 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3641 3642 if (call == MAT_INITIAL_MATRIX) { 3643 /* (4) Create parallel newmat */ 3644 PetscMPIInt rank,size; 3645 PetscInt csize; 3646 3647 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3648 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3649 3650 /* 3651 Determine the number of non-zeros in the diagonal and off-diagonal 3652 portions of the matrix in order to do correct preallocation 3653 */ 3654 3655 /* first get start and end of "diagonal" columns */ 3656 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3657 if (csize == PETSC_DECIDE) { 3658 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3659 if (mglobal == Ncols) { /* square matrix */ 3660 nlocal = m; 3661 } else { 3662 nlocal = Ncols/size + ((Ncols % size) > rank); 3663 } 3664 } else { 3665 nlocal = csize; 3666 } 3667 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3668 rstart = rend - nlocal; 3669 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3670 3671 /* next, compute all the lengths */ 3672 jj = aij->j; 3673 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3674 olens = dlens + m; 3675 for (i=0; i<m; i++) { 3676 jend = ii[i+1] - ii[i]; 3677 olen = 0; 3678 dlen = 0; 3679 for (j=0; j<jend; j++) { 3680 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3681 else dlen++; 3682 jj++; 3683 } 3684 olens[i] = olen; 3685 dlens[i] = dlen; 3686 } 3687 3688 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3689 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3690 3691 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3692 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3693 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3694 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3695 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3696 ierr = PetscFree(dlens);CHKERRQ(ierr); 3697 3698 } else { /* call == MAT_REUSE_MATRIX */ 3699 M = *newmat; 3700 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3701 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3702 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3703 /* 3704 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3705 rather than the slower MatSetValues(). 3706 */ 3707 M->was_assembled = PETSC_TRUE; 3708 M->assembled = PETSC_FALSE; 3709 } 3710 3711 /* (5) Set values of Msub to *newmat */ 3712 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3713 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3714 3715 jj = aij->j; 3716 aa = aij->a; 3717 for (i=0; i<m; i++) { 3718 row = rstart + i; 3719 nz = ii[i+1] - ii[i]; 3720 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3721 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3722 jj += nz; aa += nz; 3723 } 3724 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3725 3726 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3727 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3728 3729 ierr = PetscFree(colsub);CHKERRQ(ierr); 3730 3731 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3732 if (call == MAT_INITIAL_MATRIX) { 3733 *newmat = M; 3734 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3735 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3736 3737 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3738 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3739 3740 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3741 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3742 3743 if (iscol_local) { 3744 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3745 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3746 } 3747 } 3748 PetscFunctionReturn(0); 3749 } 3750 3751 /* 3752 Not great since it makes two copies of the submatrix, first an SeqAIJ 3753 in local and then by concatenating the local matrices the end result. 3754 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3755 3756 Note: This requires a sequential iscol with all indices. 3757 */ 3758 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3759 { 3760 PetscErrorCode ierr; 3761 PetscMPIInt rank,size; 3762 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3763 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3764 Mat M,Mreuse; 3765 MatScalar *aa,*vwork; 3766 MPI_Comm comm; 3767 Mat_SeqAIJ *aij; 3768 PetscBool colflag,allcolumns=PETSC_FALSE; 3769 3770 PetscFunctionBegin; 3771 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3772 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3773 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3774 3775 /* Check for special case: each processor gets entire matrix columns */ 3776 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3777 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3778 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3779 3780 if (call == MAT_REUSE_MATRIX) { 3781 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3782 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3783 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3784 } else { 3785 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3786 } 3787 3788 /* 3789 m - number of local rows 3790 n - number of columns (same on all processors) 3791 rstart - first row in new global matrix generated 3792 */ 3793 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3794 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3795 if (call == MAT_INITIAL_MATRIX) { 3796 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3797 ii = aij->i; 3798 jj = aij->j; 3799 3800 /* 3801 Determine the number of non-zeros in the diagonal and off-diagonal 3802 portions of the matrix in order to do correct preallocation 3803 */ 3804 3805 /* first get start and end of "diagonal" columns */ 3806 if (csize == PETSC_DECIDE) { 3807 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3808 if (mglobal == n) { /* square matrix */ 3809 nlocal = m; 3810 } else { 3811 nlocal = n/size + ((n % size) > rank); 3812 } 3813 } else { 3814 nlocal = csize; 3815 } 3816 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3817 rstart = rend - nlocal; 3818 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3819 3820 /* next, compute all the lengths */ 3821 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3822 olens = dlens + m; 3823 for (i=0; i<m; i++) { 3824 jend = ii[i+1] - ii[i]; 3825 olen = 0; 3826 dlen = 0; 3827 for (j=0; j<jend; j++) { 3828 if (*jj < rstart || *jj >= rend) olen++; 3829 else dlen++; 3830 jj++; 3831 } 3832 olens[i] = olen; 3833 dlens[i] = dlen; 3834 } 3835 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3836 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3837 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3838 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3839 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3840 ierr = PetscFree(dlens);CHKERRQ(ierr); 3841 } else { 3842 PetscInt ml,nl; 3843 3844 M = *newmat; 3845 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3846 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3847 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3848 /* 3849 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3850 rather than the slower MatSetValues(). 3851 */ 3852 M->was_assembled = PETSC_TRUE; 3853 M->assembled = PETSC_FALSE; 3854 } 3855 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3856 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3857 ii = aij->i; 3858 jj = aij->j; 3859 aa = aij->a; 3860 for (i=0; i<m; i++) { 3861 row = rstart + i; 3862 nz = ii[i+1] - ii[i]; 3863 cwork = jj; jj += nz; 3864 vwork = aa; aa += nz; 3865 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3866 } 3867 3868 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3869 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3870 *newmat = M; 3871 3872 /* save submatrix used in processor for next request */ 3873 if (call == MAT_INITIAL_MATRIX) { 3874 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3875 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3876 } 3877 PetscFunctionReturn(0); 3878 } 3879 3880 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3881 { 3882 PetscInt m,cstart, cend,j,nnz,i,d; 3883 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3884 const PetscInt *JJ; 3885 PetscScalar *values; 3886 PetscErrorCode ierr; 3887 PetscBool nooffprocentries; 3888 3889 PetscFunctionBegin; 3890 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3891 3892 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3893 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3894 m = B->rmap->n; 3895 cstart = B->cmap->rstart; 3896 cend = B->cmap->rend; 3897 rstart = B->rmap->rstart; 3898 3899 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3900 3901 #if defined(PETSC_USE_DEBUG) 3902 for (i=0; i<m && Ii; i++) { 3903 nnz = Ii[i+1]- Ii[i]; 3904 JJ = J + Ii[i]; 3905 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3906 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3907 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3908 } 3909 #endif 3910 3911 for (i=0; i<m && Ii; i++) { 3912 nnz = Ii[i+1]- Ii[i]; 3913 JJ = J + Ii[i]; 3914 nnz_max = PetscMax(nnz_max,nnz); 3915 d = 0; 3916 for (j=0; j<nnz; j++) { 3917 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3918 } 3919 d_nnz[i] = d; 3920 o_nnz[i] = nnz - d; 3921 } 3922 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3923 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3924 3925 if (v) values = (PetscScalar*)v; 3926 else { 3927 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3928 } 3929 3930 for (i=0; i<m && Ii; i++) { 3931 ii = i + rstart; 3932 nnz = Ii[i+1]- Ii[i]; 3933 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3934 } 3935 nooffprocentries = B->nooffprocentries; 3936 B->nooffprocentries = PETSC_TRUE; 3937 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3938 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3939 B->nooffprocentries = nooffprocentries; 3940 3941 if (!v) { 3942 ierr = PetscFree(values);CHKERRQ(ierr); 3943 } 3944 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3945 PetscFunctionReturn(0); 3946 } 3947 3948 /*@ 3949 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3950 (the default parallel PETSc format). 3951 3952 Collective on MPI_Comm 3953 3954 Input Parameters: 3955 + B - the matrix 3956 . i - the indices into j for the start of each local row (starts with zero) 3957 . j - the column indices for each local row (starts with zero) 3958 - v - optional values in the matrix 3959 3960 Level: developer 3961 3962 Notes: 3963 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3964 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3965 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3966 3967 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3968 3969 The format which is used for the sparse matrix input, is equivalent to a 3970 row-major ordering.. i.e for the following matrix, the input data expected is 3971 as shown 3972 3973 $ 1 0 0 3974 $ 2 0 3 P0 3975 $ ------- 3976 $ 4 5 6 P1 3977 $ 3978 $ Process0 [P0]: rows_owned=[0,1] 3979 $ i = {0,1,3} [size = nrow+1 = 2+1] 3980 $ j = {0,0,2} [size = 3] 3981 $ v = {1,2,3} [size = 3] 3982 $ 3983 $ Process1 [P1]: rows_owned=[2] 3984 $ i = {0,3} [size = nrow+1 = 1+1] 3985 $ j = {0,1,2} [size = 3] 3986 $ v = {4,5,6} [size = 3] 3987 3988 .keywords: matrix, aij, compressed row, sparse, parallel 3989 3990 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3991 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3992 @*/ 3993 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3994 { 3995 PetscErrorCode ierr; 3996 3997 PetscFunctionBegin; 3998 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3999 PetscFunctionReturn(0); 4000 } 4001 4002 /*@C 4003 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4004 (the default parallel PETSc format). For good matrix assembly performance 4005 the user should preallocate the matrix storage by setting the parameters 4006 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4007 performance can be increased by more than a factor of 50. 4008 4009 Collective on MPI_Comm 4010 4011 Input Parameters: 4012 + B - the matrix 4013 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4014 (same value is used for all local rows) 4015 . d_nnz - array containing the number of nonzeros in the various rows of the 4016 DIAGONAL portion of the local submatrix (possibly different for each row) 4017 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4018 The size of this array is equal to the number of local rows, i.e 'm'. 4019 For matrices that will be factored, you must leave room for (and set) 4020 the diagonal entry even if it is zero. 4021 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4022 submatrix (same value is used for all local rows). 4023 - o_nnz - array containing the number of nonzeros in the various rows of the 4024 OFF-DIAGONAL portion of the local submatrix (possibly different for 4025 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4026 structure. The size of this array is equal to the number 4027 of local rows, i.e 'm'. 4028 4029 If the *_nnz parameter is given then the *_nz parameter is ignored 4030 4031 The AIJ format (also called the Yale sparse matrix format or 4032 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4033 storage. The stored row and column indices begin with zero. 4034 See Users-Manual: ch_mat for details. 4035 4036 The parallel matrix is partitioned such that the first m0 rows belong to 4037 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4038 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4039 4040 The DIAGONAL portion of the local submatrix of a processor can be defined 4041 as the submatrix which is obtained by extraction the part corresponding to 4042 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4043 first row that belongs to the processor, r2 is the last row belonging to 4044 the this processor, and c1-c2 is range of indices of the local part of a 4045 vector suitable for applying the matrix to. This is an mxn matrix. In the 4046 common case of a square matrix, the row and column ranges are the same and 4047 the DIAGONAL part is also square. The remaining portion of the local 4048 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4049 4050 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4051 4052 You can call MatGetInfo() to get information on how effective the preallocation was; 4053 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4054 You can also run with the option -info and look for messages with the string 4055 malloc in them to see if additional memory allocation was needed. 4056 4057 Example usage: 4058 4059 Consider the following 8x8 matrix with 34 non-zero values, that is 4060 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4061 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4062 as follows: 4063 4064 .vb 4065 1 2 0 | 0 3 0 | 0 4 4066 Proc0 0 5 6 | 7 0 0 | 8 0 4067 9 0 10 | 11 0 0 | 12 0 4068 ------------------------------------- 4069 13 0 14 | 15 16 17 | 0 0 4070 Proc1 0 18 0 | 19 20 21 | 0 0 4071 0 0 0 | 22 23 0 | 24 0 4072 ------------------------------------- 4073 Proc2 25 26 27 | 0 0 28 | 29 0 4074 30 0 0 | 31 32 33 | 0 34 4075 .ve 4076 4077 This can be represented as a collection of submatrices as: 4078 4079 .vb 4080 A B C 4081 D E F 4082 G H I 4083 .ve 4084 4085 Where the submatrices A,B,C are owned by proc0, D,E,F are 4086 owned by proc1, G,H,I are owned by proc2. 4087 4088 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4089 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4090 The 'M','N' parameters are 8,8, and have the same values on all procs. 4091 4092 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4093 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4094 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4095 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4096 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4097 matrix, ans [DF] as another SeqAIJ matrix. 4098 4099 When d_nz, o_nz parameters are specified, d_nz storage elements are 4100 allocated for every row of the local diagonal submatrix, and o_nz 4101 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4102 One way to choose d_nz and o_nz is to use the max nonzerors per local 4103 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4104 In this case, the values of d_nz,o_nz are: 4105 .vb 4106 proc0 : dnz = 2, o_nz = 2 4107 proc1 : dnz = 3, o_nz = 2 4108 proc2 : dnz = 1, o_nz = 4 4109 .ve 4110 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4111 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4112 for proc3. i.e we are using 12+15+10=37 storage locations to store 4113 34 values. 4114 4115 When d_nnz, o_nnz parameters are specified, the storage is specified 4116 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4117 In the above case the values for d_nnz,o_nnz are: 4118 .vb 4119 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4120 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4121 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4122 .ve 4123 Here the space allocated is sum of all the above values i.e 34, and 4124 hence pre-allocation is perfect. 4125 4126 Level: intermediate 4127 4128 .keywords: matrix, aij, compressed row, sparse, parallel 4129 4130 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4131 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4132 @*/ 4133 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4134 { 4135 PetscErrorCode ierr; 4136 4137 PetscFunctionBegin; 4138 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4139 PetscValidType(B,1); 4140 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4141 PetscFunctionReturn(0); 4142 } 4143 4144 /*@ 4145 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4146 CSR format the local rows. 4147 4148 Collective on MPI_Comm 4149 4150 Input Parameters: 4151 + comm - MPI communicator 4152 . m - number of local rows (Cannot be PETSC_DECIDE) 4153 . n - This value should be the same as the local size used in creating the 4154 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4155 calculated if N is given) For square matrices n is almost always m. 4156 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4157 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4158 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4159 . j - column indices 4160 - a - matrix values 4161 4162 Output Parameter: 4163 . mat - the matrix 4164 4165 Level: intermediate 4166 4167 Notes: 4168 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4169 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4170 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4171 4172 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4173 4174 The format which is used for the sparse matrix input, is equivalent to a 4175 row-major ordering.. i.e for the following matrix, the input data expected is 4176 as shown 4177 4178 $ 1 0 0 4179 $ 2 0 3 P0 4180 $ ------- 4181 $ 4 5 6 P1 4182 $ 4183 $ Process0 [P0]: rows_owned=[0,1] 4184 $ i = {0,1,3} [size = nrow+1 = 2+1] 4185 $ j = {0,0,2} [size = 3] 4186 $ v = {1,2,3} [size = 3] 4187 $ 4188 $ Process1 [P1]: rows_owned=[2] 4189 $ i = {0,3} [size = nrow+1 = 1+1] 4190 $ j = {0,1,2} [size = 3] 4191 $ v = {4,5,6} [size = 3] 4192 4193 .keywords: matrix, aij, compressed row, sparse, parallel 4194 4195 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4196 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4197 @*/ 4198 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4199 { 4200 PetscErrorCode ierr; 4201 4202 PetscFunctionBegin; 4203 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4204 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4205 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4206 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4207 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4208 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4209 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4210 PetscFunctionReturn(0); 4211 } 4212 4213 /*@C 4214 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4215 (the default parallel PETSc format). For good matrix assembly performance 4216 the user should preallocate the matrix storage by setting the parameters 4217 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4218 performance can be increased by more than a factor of 50. 4219 4220 Collective on MPI_Comm 4221 4222 Input Parameters: 4223 + comm - MPI communicator 4224 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4225 This value should be the same as the local size used in creating the 4226 y vector for the matrix-vector product y = Ax. 4227 . n - This value should be the same as the local size used in creating the 4228 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4229 calculated if N is given) For square matrices n is almost always m. 4230 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4231 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4232 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4233 (same value is used for all local rows) 4234 . d_nnz - array containing the number of nonzeros in the various rows of the 4235 DIAGONAL portion of the local submatrix (possibly different for each row) 4236 or NULL, if d_nz is used to specify the nonzero structure. 4237 The size of this array is equal to the number of local rows, i.e 'm'. 4238 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4239 submatrix (same value is used for all local rows). 4240 - o_nnz - array containing the number of nonzeros in the various rows of the 4241 OFF-DIAGONAL portion of the local submatrix (possibly different for 4242 each row) or NULL, if o_nz is used to specify the nonzero 4243 structure. The size of this array is equal to the number 4244 of local rows, i.e 'm'. 4245 4246 Output Parameter: 4247 . A - the matrix 4248 4249 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4250 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4251 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4252 4253 Notes: 4254 If the *_nnz parameter is given then the *_nz parameter is ignored 4255 4256 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4257 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4258 storage requirements for this matrix. 4259 4260 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4261 processor than it must be used on all processors that share the object for 4262 that argument. 4263 4264 The user MUST specify either the local or global matrix dimensions 4265 (possibly both). 4266 4267 The parallel matrix is partitioned across processors such that the 4268 first m0 rows belong to process 0, the next m1 rows belong to 4269 process 1, the next m2 rows belong to process 2 etc.. where 4270 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4271 values corresponding to [m x N] submatrix. 4272 4273 The columns are logically partitioned with the n0 columns belonging 4274 to 0th partition, the next n1 columns belonging to the next 4275 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4276 4277 The DIAGONAL portion of the local submatrix on any given processor 4278 is the submatrix corresponding to the rows and columns m,n 4279 corresponding to the given processor. i.e diagonal matrix on 4280 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4281 etc. The remaining portion of the local submatrix [m x (N-n)] 4282 constitute the OFF-DIAGONAL portion. The example below better 4283 illustrates this concept. 4284 4285 For a square global matrix we define each processor's diagonal portion 4286 to be its local rows and the corresponding columns (a square submatrix); 4287 each processor's off-diagonal portion encompasses the remainder of the 4288 local matrix (a rectangular submatrix). 4289 4290 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4291 4292 When calling this routine with a single process communicator, a matrix of 4293 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4294 type of communicator, use the construction mechanism 4295 .vb 4296 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4297 .ve 4298 4299 $ MatCreate(...,&A); 4300 $ MatSetType(A,MATMPIAIJ); 4301 $ MatSetSizes(A, m,n,M,N); 4302 $ MatMPIAIJSetPreallocation(A,...); 4303 4304 By default, this format uses inodes (identical nodes) when possible. 4305 We search for consecutive rows with the same nonzero structure, thereby 4306 reusing matrix information to achieve increased efficiency. 4307 4308 Options Database Keys: 4309 + -mat_no_inode - Do not use inodes 4310 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4311 4312 4313 4314 Example usage: 4315 4316 Consider the following 8x8 matrix with 34 non-zero values, that is 4317 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4318 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4319 as follows 4320 4321 .vb 4322 1 2 0 | 0 3 0 | 0 4 4323 Proc0 0 5 6 | 7 0 0 | 8 0 4324 9 0 10 | 11 0 0 | 12 0 4325 ------------------------------------- 4326 13 0 14 | 15 16 17 | 0 0 4327 Proc1 0 18 0 | 19 20 21 | 0 0 4328 0 0 0 | 22 23 0 | 24 0 4329 ------------------------------------- 4330 Proc2 25 26 27 | 0 0 28 | 29 0 4331 30 0 0 | 31 32 33 | 0 34 4332 .ve 4333 4334 This can be represented as a collection of submatrices as 4335 4336 .vb 4337 A B C 4338 D E F 4339 G H I 4340 .ve 4341 4342 Where the submatrices A,B,C are owned by proc0, D,E,F are 4343 owned by proc1, G,H,I are owned by proc2. 4344 4345 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4346 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4347 The 'M','N' parameters are 8,8, and have the same values on all procs. 4348 4349 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4350 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4351 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4352 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4353 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4354 matrix, ans [DF] as another SeqAIJ matrix. 4355 4356 When d_nz, o_nz parameters are specified, d_nz storage elements are 4357 allocated for every row of the local diagonal submatrix, and o_nz 4358 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4359 One way to choose d_nz and o_nz is to use the max nonzerors per local 4360 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4361 In this case, the values of d_nz,o_nz are 4362 .vb 4363 proc0 : dnz = 2, o_nz = 2 4364 proc1 : dnz = 3, o_nz = 2 4365 proc2 : dnz = 1, o_nz = 4 4366 .ve 4367 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4368 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4369 for proc3. i.e we are using 12+15+10=37 storage locations to store 4370 34 values. 4371 4372 When d_nnz, o_nnz parameters are specified, the storage is specified 4373 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4374 In the above case the values for d_nnz,o_nnz are 4375 .vb 4376 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4377 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4378 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4379 .ve 4380 Here the space allocated is sum of all the above values i.e 34, and 4381 hence pre-allocation is perfect. 4382 4383 Level: intermediate 4384 4385 .keywords: matrix, aij, compressed row, sparse, parallel 4386 4387 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4388 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4389 @*/ 4390 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4391 { 4392 PetscErrorCode ierr; 4393 PetscMPIInt size; 4394 4395 PetscFunctionBegin; 4396 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4397 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4398 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4399 if (size > 1) { 4400 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4401 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4402 } else { 4403 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4404 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4405 } 4406 PetscFunctionReturn(0); 4407 } 4408 4409 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4410 { 4411 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4412 PetscBool flg; 4413 PetscErrorCode ierr; 4414 4415 PetscFunctionBegin; 4416 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4417 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4418 if (Ad) *Ad = a->A; 4419 if (Ao) *Ao = a->B; 4420 if (colmap) *colmap = a->garray; 4421 PetscFunctionReturn(0); 4422 } 4423 4424 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4425 { 4426 PetscErrorCode ierr; 4427 PetscInt m,N,i,rstart,nnz,Ii; 4428 PetscInt *indx; 4429 PetscScalar *values; 4430 4431 PetscFunctionBegin; 4432 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4433 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4434 PetscInt *dnz,*onz,sum,bs,cbs; 4435 4436 if (n == PETSC_DECIDE) { 4437 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4438 } 4439 /* Check sum(n) = N */ 4440 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4441 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4442 4443 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4444 rstart -= m; 4445 4446 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4447 for (i=0; i<m; i++) { 4448 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4449 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4450 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4451 } 4452 4453 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4454 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4455 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4456 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4457 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4458 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4459 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4460 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4461 } 4462 4463 /* numeric phase */ 4464 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4465 for (i=0; i<m; i++) { 4466 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4467 Ii = i + rstart; 4468 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4469 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4470 } 4471 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4472 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4473 PetscFunctionReturn(0); 4474 } 4475 4476 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4477 { 4478 PetscErrorCode ierr; 4479 PetscMPIInt rank; 4480 PetscInt m,N,i,rstart,nnz; 4481 size_t len; 4482 const PetscInt *indx; 4483 PetscViewer out; 4484 char *name; 4485 Mat B; 4486 const PetscScalar *values; 4487 4488 PetscFunctionBegin; 4489 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4490 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4491 /* Should this be the type of the diagonal block of A? */ 4492 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4493 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4494 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4495 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4496 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4497 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4498 for (i=0; i<m; i++) { 4499 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4500 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4501 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4502 } 4503 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4504 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4505 4506 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4507 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4508 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4509 sprintf(name,"%s.%d",outfile,rank); 4510 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4511 ierr = PetscFree(name);CHKERRQ(ierr); 4512 ierr = MatView(B,out);CHKERRQ(ierr); 4513 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4514 ierr = MatDestroy(&B);CHKERRQ(ierr); 4515 PetscFunctionReturn(0); 4516 } 4517 4518 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4519 { 4520 PetscErrorCode ierr; 4521 Mat_Merge_SeqsToMPI *merge; 4522 PetscContainer container; 4523 4524 PetscFunctionBegin; 4525 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4526 if (container) { 4527 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4528 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4529 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4530 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4531 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4532 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4533 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4534 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4535 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4536 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4537 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4538 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4539 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4540 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4541 ierr = PetscFree(merge);CHKERRQ(ierr); 4542 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4543 } 4544 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4545 PetscFunctionReturn(0); 4546 } 4547 4548 #include <../src/mat/utils/freespace.h> 4549 #include <petscbt.h> 4550 4551 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4552 { 4553 PetscErrorCode ierr; 4554 MPI_Comm comm; 4555 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4556 PetscMPIInt size,rank,taga,*len_s; 4557 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4558 PetscInt proc,m; 4559 PetscInt **buf_ri,**buf_rj; 4560 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4561 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4562 MPI_Request *s_waits,*r_waits; 4563 MPI_Status *status; 4564 MatScalar *aa=a->a; 4565 MatScalar **abuf_r,*ba_i; 4566 Mat_Merge_SeqsToMPI *merge; 4567 PetscContainer container; 4568 4569 PetscFunctionBegin; 4570 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4571 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4572 4573 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4574 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4575 4576 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4577 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4578 4579 bi = merge->bi; 4580 bj = merge->bj; 4581 buf_ri = merge->buf_ri; 4582 buf_rj = merge->buf_rj; 4583 4584 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4585 owners = merge->rowmap->range; 4586 len_s = merge->len_s; 4587 4588 /* send and recv matrix values */ 4589 /*-----------------------------*/ 4590 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4591 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4592 4593 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4594 for (proc=0,k=0; proc<size; proc++) { 4595 if (!len_s[proc]) continue; 4596 i = owners[proc]; 4597 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4598 k++; 4599 } 4600 4601 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4602 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4603 ierr = PetscFree(status);CHKERRQ(ierr); 4604 4605 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4606 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4607 4608 /* insert mat values of mpimat */ 4609 /*----------------------------*/ 4610 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4611 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4612 4613 for (k=0; k<merge->nrecv; k++) { 4614 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4615 nrows = *(buf_ri_k[k]); 4616 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4617 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4618 } 4619 4620 /* set values of ba */ 4621 m = merge->rowmap->n; 4622 for (i=0; i<m; i++) { 4623 arow = owners[rank] + i; 4624 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4625 bnzi = bi[i+1] - bi[i]; 4626 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4627 4628 /* add local non-zero vals of this proc's seqmat into ba */ 4629 anzi = ai[arow+1] - ai[arow]; 4630 aj = a->j + ai[arow]; 4631 aa = a->a + ai[arow]; 4632 nextaj = 0; 4633 for (j=0; nextaj<anzi; j++) { 4634 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4635 ba_i[j] += aa[nextaj++]; 4636 } 4637 } 4638 4639 /* add received vals into ba */ 4640 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4641 /* i-th row */ 4642 if (i == *nextrow[k]) { 4643 anzi = *(nextai[k]+1) - *nextai[k]; 4644 aj = buf_rj[k] + *(nextai[k]); 4645 aa = abuf_r[k] + *(nextai[k]); 4646 nextaj = 0; 4647 for (j=0; nextaj<anzi; j++) { 4648 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4649 ba_i[j] += aa[nextaj++]; 4650 } 4651 } 4652 nextrow[k]++; nextai[k]++; 4653 } 4654 } 4655 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4656 } 4657 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4658 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4659 4660 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4661 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4662 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4663 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4664 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4665 PetscFunctionReturn(0); 4666 } 4667 4668 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4669 { 4670 PetscErrorCode ierr; 4671 Mat B_mpi; 4672 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4673 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4674 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4675 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4676 PetscInt len,proc,*dnz,*onz,bs,cbs; 4677 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4678 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4679 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4680 MPI_Status *status; 4681 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4682 PetscBT lnkbt; 4683 Mat_Merge_SeqsToMPI *merge; 4684 PetscContainer container; 4685 4686 PetscFunctionBegin; 4687 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4688 4689 /* make sure it is a PETSc comm */ 4690 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4691 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4692 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4693 4694 ierr = PetscNew(&merge);CHKERRQ(ierr); 4695 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4696 4697 /* determine row ownership */ 4698 /*---------------------------------------------------------*/ 4699 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4700 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4701 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4702 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4703 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4704 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4705 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4706 4707 m = merge->rowmap->n; 4708 owners = merge->rowmap->range; 4709 4710 /* determine the number of messages to send, their lengths */ 4711 /*---------------------------------------------------------*/ 4712 len_s = merge->len_s; 4713 4714 len = 0; /* length of buf_si[] */ 4715 merge->nsend = 0; 4716 for (proc=0; proc<size; proc++) { 4717 len_si[proc] = 0; 4718 if (proc == rank) { 4719 len_s[proc] = 0; 4720 } else { 4721 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4722 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4723 } 4724 if (len_s[proc]) { 4725 merge->nsend++; 4726 nrows = 0; 4727 for (i=owners[proc]; i<owners[proc+1]; i++) { 4728 if (ai[i+1] > ai[i]) nrows++; 4729 } 4730 len_si[proc] = 2*(nrows+1); 4731 len += len_si[proc]; 4732 } 4733 } 4734 4735 /* determine the number and length of messages to receive for ij-structure */ 4736 /*-------------------------------------------------------------------------*/ 4737 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4738 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4739 4740 /* post the Irecv of j-structure */ 4741 /*-------------------------------*/ 4742 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4743 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4744 4745 /* post the Isend of j-structure */ 4746 /*--------------------------------*/ 4747 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4748 4749 for (proc=0, k=0; proc<size; proc++) { 4750 if (!len_s[proc]) continue; 4751 i = owners[proc]; 4752 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4753 k++; 4754 } 4755 4756 /* receives and sends of j-structure are complete */ 4757 /*------------------------------------------------*/ 4758 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4759 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4760 4761 /* send and recv i-structure */ 4762 /*---------------------------*/ 4763 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4764 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4765 4766 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4767 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4768 for (proc=0,k=0; proc<size; proc++) { 4769 if (!len_s[proc]) continue; 4770 /* form outgoing message for i-structure: 4771 buf_si[0]: nrows to be sent 4772 [1:nrows]: row index (global) 4773 [nrows+1:2*nrows+1]: i-structure index 4774 */ 4775 /*-------------------------------------------*/ 4776 nrows = len_si[proc]/2 - 1; 4777 buf_si_i = buf_si + nrows+1; 4778 buf_si[0] = nrows; 4779 buf_si_i[0] = 0; 4780 nrows = 0; 4781 for (i=owners[proc]; i<owners[proc+1]; i++) { 4782 anzi = ai[i+1] - ai[i]; 4783 if (anzi) { 4784 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4785 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4786 nrows++; 4787 } 4788 } 4789 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4790 k++; 4791 buf_si += len_si[proc]; 4792 } 4793 4794 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4795 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4796 4797 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4798 for (i=0; i<merge->nrecv; i++) { 4799 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4800 } 4801 4802 ierr = PetscFree(len_si);CHKERRQ(ierr); 4803 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4804 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4805 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4806 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4807 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4808 ierr = PetscFree(status);CHKERRQ(ierr); 4809 4810 /* compute a local seq matrix in each processor */ 4811 /*----------------------------------------------*/ 4812 /* allocate bi array and free space for accumulating nonzero column info */ 4813 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4814 bi[0] = 0; 4815 4816 /* create and initialize a linked list */ 4817 nlnk = N+1; 4818 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4819 4820 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4821 len = ai[owners[rank+1]] - ai[owners[rank]]; 4822 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4823 4824 current_space = free_space; 4825 4826 /* determine symbolic info for each local row */ 4827 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4828 4829 for (k=0; k<merge->nrecv; k++) { 4830 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4831 nrows = *buf_ri_k[k]; 4832 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4833 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4834 } 4835 4836 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4837 len = 0; 4838 for (i=0; i<m; i++) { 4839 bnzi = 0; 4840 /* add local non-zero cols of this proc's seqmat into lnk */ 4841 arow = owners[rank] + i; 4842 anzi = ai[arow+1] - ai[arow]; 4843 aj = a->j + ai[arow]; 4844 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4845 bnzi += nlnk; 4846 /* add received col data into lnk */ 4847 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4848 if (i == *nextrow[k]) { /* i-th row */ 4849 anzi = *(nextai[k]+1) - *nextai[k]; 4850 aj = buf_rj[k] + *nextai[k]; 4851 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4852 bnzi += nlnk; 4853 nextrow[k]++; nextai[k]++; 4854 } 4855 } 4856 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4857 4858 /* if free space is not available, make more free space */ 4859 if (current_space->local_remaining<bnzi) { 4860 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4861 nspacedouble++; 4862 } 4863 /* copy data into free space, then initialize lnk */ 4864 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4865 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4866 4867 current_space->array += bnzi; 4868 current_space->local_used += bnzi; 4869 current_space->local_remaining -= bnzi; 4870 4871 bi[i+1] = bi[i] + bnzi; 4872 } 4873 4874 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4875 4876 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4877 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4878 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4879 4880 /* create symbolic parallel matrix B_mpi */ 4881 /*---------------------------------------*/ 4882 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4883 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4884 if (n==PETSC_DECIDE) { 4885 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4886 } else { 4887 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4888 } 4889 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4890 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4891 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4892 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4893 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4894 4895 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4896 B_mpi->assembled = PETSC_FALSE; 4897 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4898 merge->bi = bi; 4899 merge->bj = bj; 4900 merge->buf_ri = buf_ri; 4901 merge->buf_rj = buf_rj; 4902 merge->coi = NULL; 4903 merge->coj = NULL; 4904 merge->owners_co = NULL; 4905 4906 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4907 4908 /* attach the supporting struct to B_mpi for reuse */ 4909 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4910 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4911 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4912 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4913 *mpimat = B_mpi; 4914 4915 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4916 PetscFunctionReturn(0); 4917 } 4918 4919 /*@C 4920 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4921 matrices from each processor 4922 4923 Collective on MPI_Comm 4924 4925 Input Parameters: 4926 + comm - the communicators the parallel matrix will live on 4927 . seqmat - the input sequential matrices 4928 . m - number of local rows (or PETSC_DECIDE) 4929 . n - number of local columns (or PETSC_DECIDE) 4930 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4931 4932 Output Parameter: 4933 . mpimat - the parallel matrix generated 4934 4935 Level: advanced 4936 4937 Notes: 4938 The dimensions of the sequential matrix in each processor MUST be the same. 4939 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4940 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4941 @*/ 4942 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4943 { 4944 PetscErrorCode ierr; 4945 PetscMPIInt size; 4946 4947 PetscFunctionBegin; 4948 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4949 if (size == 1) { 4950 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4951 if (scall == MAT_INITIAL_MATRIX) { 4952 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4953 } else { 4954 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4955 } 4956 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4957 PetscFunctionReturn(0); 4958 } 4959 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4960 if (scall == MAT_INITIAL_MATRIX) { 4961 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4962 } 4963 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4964 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4965 PetscFunctionReturn(0); 4966 } 4967 4968 /*@ 4969 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4970 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4971 with MatGetSize() 4972 4973 Not Collective 4974 4975 Input Parameters: 4976 + A - the matrix 4977 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4978 4979 Output Parameter: 4980 . A_loc - the local sequential matrix generated 4981 4982 Level: developer 4983 4984 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 4985 4986 @*/ 4987 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4988 { 4989 PetscErrorCode ierr; 4990 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4991 Mat_SeqAIJ *mat,*a,*b; 4992 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4993 MatScalar *aa,*ba,*cam; 4994 PetscScalar *ca; 4995 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4996 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4997 PetscBool match; 4998 MPI_Comm comm; 4999 PetscMPIInt size; 5000 5001 PetscFunctionBegin; 5002 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5003 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5004 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5005 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5006 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5007 5008 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5009 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5010 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5011 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5012 aa = a->a; ba = b->a; 5013 if (scall == MAT_INITIAL_MATRIX) { 5014 if (size == 1) { 5015 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5016 PetscFunctionReturn(0); 5017 } 5018 5019 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5020 ci[0] = 0; 5021 for (i=0; i<am; i++) { 5022 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5023 } 5024 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5025 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5026 k = 0; 5027 for (i=0; i<am; i++) { 5028 ncols_o = bi[i+1] - bi[i]; 5029 ncols_d = ai[i+1] - ai[i]; 5030 /* off-diagonal portion of A */ 5031 for (jo=0; jo<ncols_o; jo++) { 5032 col = cmap[*bj]; 5033 if (col >= cstart) break; 5034 cj[k] = col; bj++; 5035 ca[k++] = *ba++; 5036 } 5037 /* diagonal portion of A */ 5038 for (j=0; j<ncols_d; j++) { 5039 cj[k] = cstart + *aj++; 5040 ca[k++] = *aa++; 5041 } 5042 /* off-diagonal portion of A */ 5043 for (j=jo; j<ncols_o; j++) { 5044 cj[k] = cmap[*bj++]; 5045 ca[k++] = *ba++; 5046 } 5047 } 5048 /* put together the new matrix */ 5049 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5050 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5051 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5052 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5053 mat->free_a = PETSC_TRUE; 5054 mat->free_ij = PETSC_TRUE; 5055 mat->nonew = 0; 5056 } else if (scall == MAT_REUSE_MATRIX) { 5057 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5058 ci = mat->i; cj = mat->j; cam = mat->a; 5059 for (i=0; i<am; i++) { 5060 /* off-diagonal portion of A */ 5061 ncols_o = bi[i+1] - bi[i]; 5062 for (jo=0; jo<ncols_o; jo++) { 5063 col = cmap[*bj]; 5064 if (col >= cstart) break; 5065 *cam++ = *ba++; bj++; 5066 } 5067 /* diagonal portion of A */ 5068 ncols_d = ai[i+1] - ai[i]; 5069 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5070 /* off-diagonal portion of A */ 5071 for (j=jo; j<ncols_o; j++) { 5072 *cam++ = *ba++; bj++; 5073 } 5074 } 5075 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5076 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5077 PetscFunctionReturn(0); 5078 } 5079 5080 /*@C 5081 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5082 5083 Not Collective 5084 5085 Input Parameters: 5086 + A - the matrix 5087 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5088 - row, col - index sets of rows and columns to extract (or NULL) 5089 5090 Output Parameter: 5091 . A_loc - the local sequential matrix generated 5092 5093 Level: developer 5094 5095 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5096 5097 @*/ 5098 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5099 { 5100 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5101 PetscErrorCode ierr; 5102 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5103 IS isrowa,iscola; 5104 Mat *aloc; 5105 PetscBool match; 5106 5107 PetscFunctionBegin; 5108 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5109 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5110 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5111 if (!row) { 5112 start = A->rmap->rstart; end = A->rmap->rend; 5113 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5114 } else { 5115 isrowa = *row; 5116 } 5117 if (!col) { 5118 start = A->cmap->rstart; 5119 cmap = a->garray; 5120 nzA = a->A->cmap->n; 5121 nzB = a->B->cmap->n; 5122 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5123 ncols = 0; 5124 for (i=0; i<nzB; i++) { 5125 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5126 else break; 5127 } 5128 imark = i; 5129 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5130 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5131 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5132 } else { 5133 iscola = *col; 5134 } 5135 if (scall != MAT_INITIAL_MATRIX) { 5136 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5137 aloc[0] = *A_loc; 5138 } 5139 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5140 if (!col) { /* attach global id of condensed columns */ 5141 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5142 } 5143 *A_loc = aloc[0]; 5144 ierr = PetscFree(aloc);CHKERRQ(ierr); 5145 if (!row) { 5146 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5147 } 5148 if (!col) { 5149 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5150 } 5151 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5152 PetscFunctionReturn(0); 5153 } 5154 5155 /*@C 5156 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5157 5158 Collective on Mat 5159 5160 Input Parameters: 5161 + A,B - the matrices in mpiaij format 5162 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5163 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5164 5165 Output Parameter: 5166 + rowb, colb - index sets of rows and columns of B to extract 5167 - B_seq - the sequential matrix generated 5168 5169 Level: developer 5170 5171 @*/ 5172 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5173 { 5174 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5175 PetscErrorCode ierr; 5176 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5177 IS isrowb,iscolb; 5178 Mat *bseq=NULL; 5179 5180 PetscFunctionBegin; 5181 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5182 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5183 } 5184 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5185 5186 if (scall == MAT_INITIAL_MATRIX) { 5187 start = A->cmap->rstart; 5188 cmap = a->garray; 5189 nzA = a->A->cmap->n; 5190 nzB = a->B->cmap->n; 5191 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5192 ncols = 0; 5193 for (i=0; i<nzB; i++) { /* row < local row index */ 5194 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5195 else break; 5196 } 5197 imark = i; 5198 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5199 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5200 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5201 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5202 } else { 5203 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5204 isrowb = *rowb; iscolb = *colb; 5205 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5206 bseq[0] = *B_seq; 5207 } 5208 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5209 *B_seq = bseq[0]; 5210 ierr = PetscFree(bseq);CHKERRQ(ierr); 5211 if (!rowb) { 5212 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5213 } else { 5214 *rowb = isrowb; 5215 } 5216 if (!colb) { 5217 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5218 } else { 5219 *colb = iscolb; 5220 } 5221 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5222 PetscFunctionReturn(0); 5223 } 5224 5225 /* 5226 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5227 of the OFF-DIAGONAL portion of local A 5228 5229 Collective on Mat 5230 5231 Input Parameters: 5232 + A,B - the matrices in mpiaij format 5233 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5234 5235 Output Parameter: 5236 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5237 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5238 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5239 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5240 5241 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5242 for this matrix. This is not desirable.. 5243 5244 Level: developer 5245 5246 */ 5247 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5248 { 5249 PetscErrorCode ierr; 5250 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5251 Mat_SeqAIJ *b_oth; 5252 VecScatter ctx; 5253 MPI_Comm comm; 5254 const PetscMPIInt *rprocs,*sprocs; 5255 const PetscInt *srow,*rstarts,*sstarts; 5256 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5257 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5258 PetscScalar *b_otha,*bufa,*bufA,*vals; 5259 MPI_Request *rwaits = NULL,*swaits = NULL; 5260 MPI_Status rstatus; 5261 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5262 5263 PetscFunctionBegin; 5264 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5265 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5266 5267 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5268 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5269 } 5270 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5272 5273 if (size == 1) { 5274 startsj_s = NULL; 5275 bufa_ptr = NULL; 5276 *B_oth = NULL; 5277 PetscFunctionReturn(0); 5278 } 5279 5280 ctx = a->Mvctx; 5281 tag = ((PetscObject)ctx)->tag; 5282 5283 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5284 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5285 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5286 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5287 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5288 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5289 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5290 5291 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5292 if (scall == MAT_INITIAL_MATRIX) { 5293 /* i-array */ 5294 /*---------*/ 5295 /* post receives */ 5296 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5297 for (i=0; i<nrecvs; i++) { 5298 rowlen = rvalues + rstarts[i]*rbs; 5299 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5300 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5301 } 5302 5303 /* pack the outgoing message */ 5304 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5305 5306 sstartsj[0] = 0; 5307 rstartsj[0] = 0; 5308 len = 0; /* total length of j or a array to be sent */ 5309 if (nsends) { 5310 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5311 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5312 } 5313 for (i=0; i<nsends; i++) { 5314 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5315 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5316 for (j=0; j<nrows; j++) { 5317 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5318 for (l=0; l<sbs; l++) { 5319 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5320 5321 rowlen[j*sbs+l] = ncols; 5322 5323 len += ncols; 5324 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5325 } 5326 k++; 5327 } 5328 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5329 5330 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5331 } 5332 /* recvs and sends of i-array are completed */ 5333 i = nrecvs; 5334 while (i--) { 5335 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5336 } 5337 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5338 ierr = PetscFree(svalues);CHKERRQ(ierr); 5339 5340 /* allocate buffers for sending j and a arrays */ 5341 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5342 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5343 5344 /* create i-array of B_oth */ 5345 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5346 5347 b_othi[0] = 0; 5348 len = 0; /* total length of j or a array to be received */ 5349 k = 0; 5350 for (i=0; i<nrecvs; i++) { 5351 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5352 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5353 for (j=0; j<nrows; j++) { 5354 b_othi[k+1] = b_othi[k] + rowlen[j]; 5355 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5356 k++; 5357 } 5358 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5359 } 5360 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5361 5362 /* allocate space for j and a arrrays of B_oth */ 5363 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5364 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5365 5366 /* j-array */ 5367 /*---------*/ 5368 /* post receives of j-array */ 5369 for (i=0; i<nrecvs; i++) { 5370 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5371 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5372 } 5373 5374 /* pack the outgoing message j-array */ 5375 if (nsends) k = sstarts[0]; 5376 for (i=0; i<nsends; i++) { 5377 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5378 bufJ = bufj+sstartsj[i]; 5379 for (j=0; j<nrows; j++) { 5380 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5381 for (ll=0; ll<sbs; ll++) { 5382 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5383 for (l=0; l<ncols; l++) { 5384 *bufJ++ = cols[l]; 5385 } 5386 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5387 } 5388 } 5389 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5390 } 5391 5392 /* recvs and sends of j-array are completed */ 5393 i = nrecvs; 5394 while (i--) { 5395 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5396 } 5397 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5398 } else if (scall == MAT_REUSE_MATRIX) { 5399 sstartsj = *startsj_s; 5400 rstartsj = *startsj_r; 5401 bufa = *bufa_ptr; 5402 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5403 b_otha = b_oth->a; 5404 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5405 5406 /* a-array */ 5407 /*---------*/ 5408 /* post receives of a-array */ 5409 for (i=0; i<nrecvs; i++) { 5410 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5411 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5412 } 5413 5414 /* pack the outgoing message a-array */ 5415 if (nsends) k = sstarts[0]; 5416 for (i=0; i<nsends; i++) { 5417 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5418 bufA = bufa+sstartsj[i]; 5419 for (j=0; j<nrows; j++) { 5420 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5421 for (ll=0; ll<sbs; ll++) { 5422 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5423 for (l=0; l<ncols; l++) { 5424 *bufA++ = vals[l]; 5425 } 5426 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5427 } 5428 } 5429 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5430 } 5431 /* recvs and sends of a-array are completed */ 5432 i = nrecvs; 5433 while (i--) { 5434 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5435 } 5436 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5437 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5438 5439 if (scall == MAT_INITIAL_MATRIX) { 5440 /* put together the new matrix */ 5441 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5442 5443 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5444 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5445 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5446 b_oth->free_a = PETSC_TRUE; 5447 b_oth->free_ij = PETSC_TRUE; 5448 b_oth->nonew = 0; 5449 5450 ierr = PetscFree(bufj);CHKERRQ(ierr); 5451 if (!startsj_s || !bufa_ptr) { 5452 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5453 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5454 } else { 5455 *startsj_s = sstartsj; 5456 *startsj_r = rstartsj; 5457 *bufa_ptr = bufa; 5458 } 5459 } 5460 5461 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5462 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5463 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5464 PetscFunctionReturn(0); 5465 } 5466 5467 /*@C 5468 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5469 5470 Not Collective 5471 5472 Input Parameters: 5473 . A - The matrix in mpiaij format 5474 5475 Output Parameter: 5476 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5477 . colmap - A map from global column index to local index into lvec 5478 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5479 5480 Level: developer 5481 5482 @*/ 5483 #if defined(PETSC_USE_CTABLE) 5484 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5485 #else 5486 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5487 #endif 5488 { 5489 Mat_MPIAIJ *a; 5490 5491 PetscFunctionBegin; 5492 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5493 PetscValidPointer(lvec, 2); 5494 PetscValidPointer(colmap, 3); 5495 PetscValidPointer(multScatter, 4); 5496 a = (Mat_MPIAIJ*) A->data; 5497 if (lvec) *lvec = a->lvec; 5498 if (colmap) *colmap = a->colmap; 5499 if (multScatter) *multScatter = a->Mvctx; 5500 PetscFunctionReturn(0); 5501 } 5502 5503 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5504 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5505 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5506 #if defined(PETSC_HAVE_MKL_SPARSE) 5507 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5508 #endif 5509 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5510 #if defined(PETSC_HAVE_ELEMENTAL) 5511 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5512 #endif 5513 #if defined(PETSC_HAVE_HYPRE) 5514 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5515 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5516 #endif 5517 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5518 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5519 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5520 5521 /* 5522 Computes (B'*A')' since computing B*A directly is untenable 5523 5524 n p p 5525 ( ) ( ) ( ) 5526 m ( A ) * n ( B ) = m ( C ) 5527 ( ) ( ) ( ) 5528 5529 */ 5530 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5531 { 5532 PetscErrorCode ierr; 5533 Mat At,Bt,Ct; 5534 5535 PetscFunctionBegin; 5536 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5537 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5538 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5539 ierr = MatDestroy(&At);CHKERRQ(ierr); 5540 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5541 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5542 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5543 PetscFunctionReturn(0); 5544 } 5545 5546 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5547 { 5548 PetscErrorCode ierr; 5549 PetscInt m=A->rmap->n,n=B->cmap->n; 5550 Mat Cmat; 5551 5552 PetscFunctionBegin; 5553 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5554 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5555 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5556 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5557 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5558 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5559 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5560 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5561 5562 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5563 5564 *C = Cmat; 5565 PetscFunctionReturn(0); 5566 } 5567 5568 /* ----------------------------------------------------------------*/ 5569 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5570 { 5571 PetscErrorCode ierr; 5572 5573 PetscFunctionBegin; 5574 if (scall == MAT_INITIAL_MATRIX) { 5575 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5576 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5577 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5578 } 5579 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5580 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5581 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5582 PetscFunctionReturn(0); 5583 } 5584 5585 /*MC 5586 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5587 5588 Options Database Keys: 5589 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5590 5591 Level: beginner 5592 5593 .seealso: MatCreateAIJ() 5594 M*/ 5595 5596 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5597 { 5598 Mat_MPIAIJ *b; 5599 PetscErrorCode ierr; 5600 PetscMPIInt size; 5601 5602 PetscFunctionBegin; 5603 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5604 5605 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5606 B->data = (void*)b; 5607 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5608 B->assembled = PETSC_FALSE; 5609 B->insertmode = NOT_SET_VALUES; 5610 b->size = size; 5611 5612 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5613 5614 /* build cache for off array entries formed */ 5615 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5616 5617 b->donotstash = PETSC_FALSE; 5618 b->colmap = 0; 5619 b->garray = 0; 5620 b->roworiented = PETSC_TRUE; 5621 5622 /* stuff used for matrix vector multiply */ 5623 b->lvec = NULL; 5624 b->Mvctx = NULL; 5625 5626 /* stuff for MatGetRow() */ 5627 b->rowindices = 0; 5628 b->rowvalues = 0; 5629 b->getrowactive = PETSC_FALSE; 5630 5631 /* flexible pointer used in CUSP/CUSPARSE classes */ 5632 b->spptr = NULL; 5633 5634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5635 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5636 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5638 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5639 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5644 #if defined(PETSC_HAVE_MKL_SPARSE) 5645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5646 #endif 5647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5649 #if defined(PETSC_HAVE_ELEMENTAL) 5650 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5651 #endif 5652 #if defined(PETSC_HAVE_HYPRE) 5653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5654 #endif 5655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5657 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5660 #if defined(PETSC_HAVE_HYPRE) 5661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5662 #endif 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5664 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5665 PetscFunctionReturn(0); 5666 } 5667 5668 /*@C 5669 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5670 and "off-diagonal" part of the matrix in CSR format. 5671 5672 Collective on MPI_Comm 5673 5674 Input Parameters: 5675 + comm - MPI communicator 5676 . m - number of local rows (Cannot be PETSC_DECIDE) 5677 . n - This value should be the same as the local size used in creating the 5678 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5679 calculated if N is given) For square matrices n is almost always m. 5680 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5681 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5682 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5683 . j - column indices 5684 . a - matrix values 5685 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5686 . oj - column indices 5687 - oa - matrix values 5688 5689 Output Parameter: 5690 . mat - the matrix 5691 5692 Level: advanced 5693 5694 Notes: 5695 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5696 must free the arrays once the matrix has been destroyed and not before. 5697 5698 The i and j indices are 0 based 5699 5700 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5701 5702 This sets local rows and cannot be used to set off-processor values. 5703 5704 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5705 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5706 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5707 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5708 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5709 communication if it is known that only local entries will be set. 5710 5711 .keywords: matrix, aij, compressed row, sparse, parallel 5712 5713 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5714 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5715 @*/ 5716 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5717 { 5718 PetscErrorCode ierr; 5719 Mat_MPIAIJ *maij; 5720 5721 PetscFunctionBegin; 5722 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5723 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5724 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5725 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5726 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5727 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5728 maij = (Mat_MPIAIJ*) (*mat)->data; 5729 5730 (*mat)->preallocated = PETSC_TRUE; 5731 5732 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5733 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5734 5735 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5736 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5737 5738 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5739 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5740 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5741 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5742 5743 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5744 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5745 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5746 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5747 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5748 PetscFunctionReturn(0); 5749 } 5750 5751 /* 5752 Special version for direct calls from Fortran 5753 */ 5754 #include <petsc/private/fortranimpl.h> 5755 5756 /* Change these macros so can be used in void function */ 5757 #undef CHKERRQ 5758 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5759 #undef SETERRQ2 5760 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5761 #undef SETERRQ3 5762 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5763 #undef SETERRQ 5764 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5765 5766 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5767 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5768 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5769 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5770 #else 5771 #endif 5772 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5773 { 5774 Mat mat = *mmat; 5775 PetscInt m = *mm, n = *mn; 5776 InsertMode addv = *maddv; 5777 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5778 PetscScalar value; 5779 PetscErrorCode ierr; 5780 5781 MatCheckPreallocated(mat,1); 5782 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5783 5784 #if defined(PETSC_USE_DEBUG) 5785 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5786 #endif 5787 { 5788 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5789 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5790 PetscBool roworiented = aij->roworiented; 5791 5792 /* Some Variables required in the macro */ 5793 Mat A = aij->A; 5794 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5795 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5796 MatScalar *aa = a->a; 5797 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5798 Mat B = aij->B; 5799 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5800 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5801 MatScalar *ba = b->a; 5802 5803 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5804 PetscInt nonew = a->nonew; 5805 MatScalar *ap1,*ap2; 5806 5807 PetscFunctionBegin; 5808 for (i=0; i<m; i++) { 5809 if (im[i] < 0) continue; 5810 #if defined(PETSC_USE_DEBUG) 5811 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5812 #endif 5813 if (im[i] >= rstart && im[i] < rend) { 5814 row = im[i] - rstart; 5815 lastcol1 = -1; 5816 rp1 = aj + ai[row]; 5817 ap1 = aa + ai[row]; 5818 rmax1 = aimax[row]; 5819 nrow1 = ailen[row]; 5820 low1 = 0; 5821 high1 = nrow1; 5822 lastcol2 = -1; 5823 rp2 = bj + bi[row]; 5824 ap2 = ba + bi[row]; 5825 rmax2 = bimax[row]; 5826 nrow2 = bilen[row]; 5827 low2 = 0; 5828 high2 = nrow2; 5829 5830 for (j=0; j<n; j++) { 5831 if (roworiented) value = v[i*n+j]; 5832 else value = v[i+j*m]; 5833 if (in[j] >= cstart && in[j] < cend) { 5834 col = in[j] - cstart; 5835 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5836 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5837 } else if (in[j] < 0) continue; 5838 #if defined(PETSC_USE_DEBUG) 5839 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5840 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5841 #endif 5842 else { 5843 if (mat->was_assembled) { 5844 if (!aij->colmap) { 5845 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5846 } 5847 #if defined(PETSC_USE_CTABLE) 5848 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5849 col--; 5850 #else 5851 col = aij->colmap[in[j]] - 1; 5852 #endif 5853 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5854 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5855 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5856 col = in[j]; 5857 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5858 B = aij->B; 5859 b = (Mat_SeqAIJ*)B->data; 5860 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5861 rp2 = bj + bi[row]; 5862 ap2 = ba + bi[row]; 5863 rmax2 = bimax[row]; 5864 nrow2 = bilen[row]; 5865 low2 = 0; 5866 high2 = nrow2; 5867 bm = aij->B->rmap->n; 5868 ba = b->a; 5869 } 5870 } else col = in[j]; 5871 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5872 } 5873 } 5874 } else if (!aij->donotstash) { 5875 if (roworiented) { 5876 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5877 } else { 5878 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5879 } 5880 } 5881 } 5882 } 5883 PetscFunctionReturnVoid(); 5884 } 5885