1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = NULL; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=NULL; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to access. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_DEVICE) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 583 if (im[i] >= rstart && im[i] < rend) { 584 row = im[i] - rstart; 585 lastcol1 = -1; 586 rp1 = aj + ai[row]; 587 ap1 = aa + ai[row]; 588 rmax1 = aimax[row]; 589 nrow1 = ailen[row]; 590 low1 = 0; 591 high1 = nrow1; 592 lastcol2 = -1; 593 rp2 = bj + bi[row]; 594 ap2 = ba + bi[row]; 595 rmax2 = bimax[row]; 596 nrow2 = bilen[row]; 597 low2 = 0; 598 high2 = nrow2; 599 600 for (j=0; j<n; j++) { 601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 603 if (in[j] >= cstart && in[j] < cend) { 604 col = in[j] - cstart; 605 nonew = a->nonew; 606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 607 #if defined(PETSC_HAVE_DEVICE) 608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 609 #endif 610 } else if (in[j] < 0) continue; 611 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 612 else { 613 if (mat->was_assembled) { 614 if (!aij->colmap) { 615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 616 } 617 #if defined(PETSC_USE_CTABLE) 618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 619 col--; 620 #else 621 col = aij->colmap[in[j]] - 1; 622 #endif 623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 625 col = in[j]; 626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 627 B = aij->B; 628 b = (Mat_SeqAIJ*)B->data; 629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 630 rp2 = bj + bi[row]; 631 ap2 = ba + bi[row]; 632 rmax2 = bimax[row]; 633 nrow2 = bilen[row]; 634 low2 = 0; 635 high2 = nrow2; 636 bm = aij->B->rmap->n; 637 ba = b->a; 638 inserted = PETSC_FALSE; 639 } else if (col < 0) { 640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 643 } 644 } else col = in[j]; 645 nonew = b->nonew; 646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 647 #if defined(PETSC_HAVE_DEVICE) 648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 649 #endif 650 } 651 } 652 } else { 653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 654 if (!aij->donotstash) { 655 mat->assembled = PETSC_FALSE; 656 if (roworiented) { 657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 658 } else { 659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 /* 668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 671 */ 672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 673 { 674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 675 Mat A = aij->A; /* diagonal part of the matrix */ 676 Mat B = aij->B; /* offdiagonal part of the matrix */ 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 680 PetscInt *ailen = a->ilen,*aj = a->j; 681 PetscInt *bilen = b->ilen,*bj = b->j; 682 PetscInt am = aij->A->rmap->n,j; 683 PetscInt diag_so_far = 0,dnz; 684 PetscInt offd_so_far = 0,onz; 685 686 PetscFunctionBegin; 687 /* Iterate over all rows of the matrix */ 688 for (j=0; j<am; j++) { 689 dnz = onz = 0; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[diag_so_far++] = mat_j[col] - cstart; 695 dnz++; 696 } else { /* off-diagonal entries */ 697 bj[offd_so_far++] = mat_j[col]; 698 onz++; 699 } 700 } 701 ailen[j] = dnz; 702 bilen[j] = onz; 703 } 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 713 */ 714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 717 Mat A = aij->A; /* diagonal part of the matrix */ 718 Mat B = aij->B; /* offdiagonal part of the matrix */ 719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 723 PetscInt *ailen = a->ilen,*aj = a->j; 724 PetscInt *bilen = b->ilen,*bj = b->j; 725 PetscInt am = aij->A->rmap->n,j; 726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 728 PetscScalar *aa = a->a,*ba = b->a; 729 730 PetscFunctionBegin; 731 /* Iterate over all rows of the matrix */ 732 for (j=0; j<am; j++) { 733 dnz_row = onz_row = 0; 734 rowstart_offd = full_offd_i[j]; 735 rowstart_diag = full_diag_i[j]; 736 /* Iterate over all non-zero columns of the current row */ 737 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 738 /* If column is in the diagonal */ 739 if (mat_j[col] >= cstart && mat_j[col] < cend) { 740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 741 aa[rowstart_diag+dnz_row] = mat_a[col]; 742 dnz_row++; 743 } else { /* off-diagonal entries */ 744 bj[rowstart_offd+onz_row] = mat_j[col]; 745 ba[rowstart_offd+onz_row] = mat_a[col]; 746 onz_row++; 747 } 748 } 749 ailen[j] = dnz_row; 750 bilen[j] = onz_row; 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 766 if (idxm[i] >= rstart && idxm[i] < rend) { 767 row = idxm[i] - rstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 771 if (idxn[j] >= cstart && idxn[j] < cend) { 772 col = idxn[j] - cstart; 773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!aij->colmap) { 776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined(PETSC_USE_CTABLE) 779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 780 col--; 781 #else 782 col = aij->colmap[idxn[j]] - 1; 783 #endif 784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 785 else { 786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } 788 } 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 796 { 797 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 798 PetscErrorCode ierr; 799 PetscInt nstash,reallocs; 800 801 PetscFunctionBegin; 802 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 803 804 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 805 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 806 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 807 PetscFunctionReturn(0); 808 } 809 810 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 811 { 812 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 813 PetscErrorCode ierr; 814 PetscMPIInt n; 815 PetscInt i,j,rstart,ncols,flg; 816 PetscInt *row,*col; 817 PetscBool other_disassembled; 818 PetscScalar *val; 819 820 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 821 822 PetscFunctionBegin; 823 if (!aij->donotstash && !mat->nooffprocentries) { 824 while (1) { 825 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 826 if (!flg) break; 827 828 for (i=0; i<n;) { 829 /* Now identify the consecutive vals belonging to the same row */ 830 for (j=i,rstart=row[j]; j<n; j++) { 831 if (row[j] != rstart) break; 832 } 833 if (j < n) ncols = j-i; 834 else ncols = n-i; 835 /* Now assemble all these values with a single function call */ 836 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 837 i = j; 838 } 839 } 840 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 841 } 842 #if defined(PETSC_HAVE_DEVICE) 843 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 844 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 845 if (mat->boundtocpu) { 846 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 847 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 848 } 849 #endif 850 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 851 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 852 853 /* determine if any processor has disassembled, if so we must 854 also disassemble ourself, in order that we may reassemble. */ 855 /* 856 if nonzero structure of submatrix B cannot change then we know that 857 no processor disassembled thus we can skip this stuff 858 */ 859 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 860 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 861 if (mat->was_assembled && !other_disassembled) { 862 #if defined(PETSC_HAVE_DEVICE) 863 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 864 #endif 865 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 866 } 867 } 868 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 869 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 870 } 871 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 872 #if defined(PETSC_HAVE_DEVICE) 873 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 874 #endif 875 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 876 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 877 878 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 879 880 aij->rowvalues = NULL; 881 882 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 883 884 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 885 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 886 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 887 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 888 } 889 #if defined(PETSC_HAVE_DEVICE) 890 mat->offloadmask = PETSC_OFFLOAD_BOTH; 891 #endif 892 PetscFunctionReturn(0); 893 } 894 895 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 896 { 897 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 898 PetscErrorCode ierr; 899 900 PetscFunctionBegin; 901 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 902 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } 905 906 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 909 PetscObjectState sA, sB; 910 PetscInt *lrows; 911 PetscInt r, len; 912 PetscBool cong, lch, gch; 913 PetscErrorCode ierr; 914 915 PetscFunctionBegin; 916 /* get locally owned rows */ 917 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 918 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 919 /* fix right hand side if needed */ 920 if (x && b) { 921 const PetscScalar *xx; 922 PetscScalar *bb; 923 924 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 925 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 926 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 927 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 928 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 929 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 930 } 931 932 sA = mat->A->nonzerostate; 933 sB = mat->B->nonzerostate; 934 935 if (diag != 0.0 && cong) { 936 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 937 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 938 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 939 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 940 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 941 PetscInt nnwA, nnwB; 942 PetscBool nnzA, nnzB; 943 944 nnwA = aijA->nonew; 945 nnwB = aijB->nonew; 946 nnzA = aijA->keepnonzeropattern; 947 nnzB = aijB->keepnonzeropattern; 948 if (!nnzA) { 949 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 950 aijA->nonew = 0; 951 } 952 if (!nnzB) { 953 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 954 aijB->nonew = 0; 955 } 956 /* Must zero here before the next loop */ 957 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 958 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 959 for (r = 0; r < len; ++r) { 960 const PetscInt row = lrows[r] + A->rmap->rstart; 961 if (row >= A->cmap->N) continue; 962 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 963 } 964 aijA->nonew = nnwA; 965 aijB->nonew = nnwB; 966 } else { 967 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 968 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 969 } 970 ierr = PetscFree(lrows);CHKERRQ(ierr); 971 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 972 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 973 974 /* reduce nonzerostate */ 975 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 976 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 977 if (gch) A->nonzerostate++; 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 982 { 983 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 984 PetscErrorCode ierr; 985 PetscMPIInt n = A->rmap->n; 986 PetscInt i,j,r,m,len = 0; 987 PetscInt *lrows,*owners = A->rmap->range; 988 PetscMPIInt p = 0; 989 PetscSFNode *rrows; 990 PetscSF sf; 991 const PetscScalar *xx; 992 PetscScalar *bb,*mask; 993 Vec xmask,lmask; 994 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 995 const PetscInt *aj, *ii,*ridx; 996 PetscScalar *aa; 997 998 PetscFunctionBegin; 999 /* Create SF where leaves are input rows and roots are owned rows */ 1000 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1001 for (r = 0; r < n; ++r) lrows[r] = -1; 1002 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1003 for (r = 0; r < N; ++r) { 1004 const PetscInt idx = rows[r]; 1005 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1006 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1007 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1008 } 1009 rrows[r].rank = p; 1010 rrows[r].index = rows[r] - owners[p]; 1011 } 1012 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1013 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1014 /* Collect flags for rows to be zeroed */ 1015 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1016 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1017 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1018 /* Compress and put in row numbers */ 1019 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1020 /* zero diagonal part of matrix */ 1021 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1022 /* handle off diagonal part of matrix */ 1023 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1026 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1031 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1032 PetscBool cong; 1033 1034 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1035 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1036 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1039 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1040 } 1041 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1042 /* remove zeroed rows of off diagonal matrix */ 1043 ii = aij->i; 1044 for (i=0; i<len; i++) { 1045 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1046 } 1047 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1048 if (aij->compressedrow.use) { 1049 m = aij->compressedrow.nrows; 1050 ii = aij->compressedrow.i; 1051 ridx = aij->compressedrow.rindex; 1052 for (i=0; i<m; i++) { 1053 n = ii[i+1] - ii[i]; 1054 aj = aij->j + ii[i]; 1055 aa = aij->a + ii[i]; 1056 1057 for (j=0; j<n; j++) { 1058 if (PetscAbsScalar(mask[*aj])) { 1059 if (b) bb[*ridx] -= *aa*xx[*aj]; 1060 *aa = 0.0; 1061 } 1062 aa++; 1063 aj++; 1064 } 1065 ridx++; 1066 } 1067 } else { /* do not use compressed row format */ 1068 m = l->B->rmap->n; 1069 for (i=0; i<m; i++) { 1070 n = ii[i+1] - ii[i]; 1071 aj = aij->j + ii[i]; 1072 aa = aij->a + ii[i]; 1073 for (j=0; j<n; j++) { 1074 if (PetscAbsScalar(mask[*aj])) { 1075 if (b) bb[i] -= *aa*xx[*aj]; 1076 *aa = 0.0; 1077 } 1078 aa++; 1079 aj++; 1080 } 1081 } 1082 } 1083 if (x && b) { 1084 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1085 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1086 } 1087 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1088 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1089 ierr = PetscFree(lrows);CHKERRQ(ierr); 1090 1091 /* only change matrix nonzero state if pattern was allowed to be changed */ 1092 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1093 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1094 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1095 } 1096 PetscFunctionReturn(0); 1097 } 1098 1099 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1100 { 1101 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1102 PetscErrorCode ierr; 1103 PetscInt nt; 1104 VecScatter Mvctx = a->Mvctx; 1105 1106 PetscFunctionBegin; 1107 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1108 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1109 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1111 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1127 { 1128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1129 PetscErrorCode ierr; 1130 VecScatter Mvctx = a->Mvctx; 1131 1132 PetscFunctionBegin; 1133 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1134 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1135 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1136 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1141 { 1142 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1143 PetscErrorCode ierr; 1144 1145 PetscFunctionBegin; 1146 /* do nondiagonal part */ 1147 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1148 /* do local part */ 1149 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1150 /* add partial results together */ 1151 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1152 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1157 { 1158 MPI_Comm comm; 1159 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1160 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1161 IS Me,Notme; 1162 PetscErrorCode ierr; 1163 PetscInt M,N,first,last,*notme,i; 1164 PetscBool lf; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 /* Easy test: symmetric diagonal block */ 1169 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1170 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1171 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1199 { 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1208 { 1209 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1210 PetscErrorCode ierr; 1211 1212 PetscFunctionBegin; 1213 /* do nondiagonal part */ 1214 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1215 /* do local part */ 1216 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1217 /* add partial results together */ 1218 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1219 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 /* 1224 This only works correctly for square matrices where the subblock A->A is the 1225 diagonal block 1226 */ 1227 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1228 { 1229 PetscErrorCode ierr; 1230 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1231 1232 PetscFunctionBegin; 1233 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1234 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1235 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1240 { 1241 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1242 PetscErrorCode ierr; 1243 1244 PetscFunctionBegin; 1245 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1246 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1251 { 1252 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1253 PetscErrorCode ierr; 1254 1255 PetscFunctionBegin; 1256 #if defined(PETSC_USE_LOG) 1257 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1258 #endif 1259 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1260 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1261 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1263 #if defined(PETSC_USE_CTABLE) 1264 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1265 #else 1266 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1267 #endif 1268 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1269 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1270 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1271 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1272 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1273 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1274 1275 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1276 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1277 1278 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1288 #if defined(PETSC_HAVE_CUDA) 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1290 #endif 1291 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1293 #endif 1294 #if defined(PETSC_HAVE_ELEMENTAL) 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1296 #endif 1297 #if defined(PETSC_HAVE_SCALAPACK) 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1299 #endif 1300 #if defined(PETSC_HAVE_HYPRE) 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1303 #endif 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1310 #if defined(PETSC_HAVE_MKL_SPARSE) 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1312 #endif 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1320 { 1321 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1322 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1323 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1324 const PetscInt *garray = aij->garray; 1325 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1326 PetscInt *rowlens; 1327 PetscInt *colidxs; 1328 PetscScalar *matvals; 1329 PetscErrorCode ierr; 1330 1331 PetscFunctionBegin; 1332 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1333 1334 M = mat->rmap->N; 1335 N = mat->cmap->N; 1336 m = mat->rmap->n; 1337 rs = mat->rmap->rstart; 1338 cs = mat->cmap->rstart; 1339 nz = A->nz + B->nz; 1340 1341 /* write matrix header */ 1342 header[0] = MAT_FILE_CLASSID; 1343 header[1] = M; header[2] = N; header[3] = nz; 1344 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1345 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1346 1347 /* fill in and store row lengths */ 1348 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1349 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1350 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1351 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1352 1353 /* fill in and store column indices */ 1354 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1355 for (cnt=0, i=0; i<m; i++) { 1356 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1357 if (garray[B->j[jb]] > cs) break; 1358 colidxs[cnt++] = garray[B->j[jb]]; 1359 } 1360 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1361 colidxs[cnt++] = A->j[ja] + cs; 1362 for (; jb<B->i[i+1]; jb++) 1363 colidxs[cnt++] = garray[B->j[jb]]; 1364 } 1365 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1366 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1367 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1368 1369 /* fill in and store nonzero values */ 1370 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1371 for (cnt=0, i=0; i<m; i++) { 1372 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1373 if (garray[B->j[jb]] > cs) break; 1374 matvals[cnt++] = B->a[jb]; 1375 } 1376 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1377 matvals[cnt++] = A->a[ja]; 1378 for (; jb<B->i[i+1]; jb++) 1379 matvals[cnt++] = B->a[jb]; 1380 } 1381 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1382 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1383 ierr = PetscFree(matvals);CHKERRQ(ierr); 1384 1385 /* write block size option to the viewer's .info file */ 1386 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1387 PetscFunctionReturn(0); 1388 } 1389 1390 #include <petscdraw.h> 1391 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1392 { 1393 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1394 PetscErrorCode ierr; 1395 PetscMPIInt rank = aij->rank,size = aij->size; 1396 PetscBool isdraw,iascii,isbinary; 1397 PetscViewer sviewer; 1398 PetscViewerFormat format; 1399 1400 PetscFunctionBegin; 1401 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1402 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1403 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1404 if (iascii) { 1405 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1406 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1407 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1408 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1409 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1410 for (i=0; i<(PetscInt)size; i++) { 1411 nmax = PetscMax(nmax,nz[i]); 1412 nmin = PetscMin(nmin,nz[i]); 1413 navg += nz[i]; 1414 } 1415 ierr = PetscFree(nz);CHKERRQ(ierr); 1416 navg = navg/size; 1417 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1421 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1422 MatInfo info; 1423 PetscBool inodes; 1424 1425 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1426 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1427 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1429 if (!inodes) { 1430 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1431 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1432 } else { 1433 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1434 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1435 } 1436 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1437 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1438 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1439 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1440 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1441 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1442 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1443 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1446 PetscInt inodecount,inodelimit,*inodes; 1447 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1448 if (inodes) { 1449 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1450 } else { 1451 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1452 } 1453 PetscFunctionReturn(0); 1454 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1455 PetscFunctionReturn(0); 1456 } 1457 } else if (isbinary) { 1458 if (size == 1) { 1459 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1460 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1461 } else { 1462 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1463 } 1464 PetscFunctionReturn(0); 1465 } else if (iascii && size == 1) { 1466 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1467 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } else if (isdraw) { 1470 PetscDraw draw; 1471 PetscBool isnull; 1472 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1473 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1474 if (isnull) PetscFunctionReturn(0); 1475 } 1476 1477 { /* assemble the entire matrix onto first processor */ 1478 Mat A = NULL, Av; 1479 IS isrow,iscol; 1480 1481 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1482 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1483 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1484 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1485 /* The commented code uses MatCreateSubMatrices instead */ 1486 /* 1487 Mat *AA, A = NULL, Av; 1488 IS isrow,iscol; 1489 1490 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1491 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1492 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1493 if (!rank) { 1494 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1495 A = AA[0]; 1496 Av = AA[0]; 1497 } 1498 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1499 */ 1500 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1501 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1502 /* 1503 Everyone has to call to draw the matrix since the graphics waits are 1504 synchronized across all processors that share the PetscDraw object 1505 */ 1506 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1507 if (!rank) { 1508 if (((PetscObject)mat)->name) { 1509 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1510 } 1511 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1512 } 1513 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1514 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1515 ierr = MatDestroy(&A);CHKERRQ(ierr); 1516 } 1517 PetscFunctionReturn(0); 1518 } 1519 1520 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1521 { 1522 PetscErrorCode ierr; 1523 PetscBool iascii,isdraw,issocket,isbinary; 1524 1525 PetscFunctionBegin; 1526 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1527 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1528 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1529 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1530 if (iascii || isdraw || isbinary || issocket) { 1531 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1532 } 1533 PetscFunctionReturn(0); 1534 } 1535 1536 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1537 { 1538 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1539 PetscErrorCode ierr; 1540 Vec bb1 = NULL; 1541 PetscBool hasop; 1542 1543 PetscFunctionBegin; 1544 if (flag == SOR_APPLY_UPPER) { 1545 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1550 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1551 } 1552 1553 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1554 if (flag & SOR_ZERO_INITIAL_GUESS) { 1555 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1556 its--; 1557 } 1558 1559 while (its--) { 1560 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1561 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1562 1563 /* update rhs: bb1 = bb - B*x */ 1564 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1565 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1566 1567 /* local sweep */ 1568 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1569 } 1570 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1571 if (flag & SOR_ZERO_INITIAL_GUESS) { 1572 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1573 its--; 1574 } 1575 while (its--) { 1576 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1577 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1578 1579 /* update rhs: bb1 = bb - B*x */ 1580 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1581 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1582 1583 /* local sweep */ 1584 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1585 } 1586 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1587 if (flag & SOR_ZERO_INITIAL_GUESS) { 1588 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1589 its--; 1590 } 1591 while (its--) { 1592 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1593 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1594 1595 /* update rhs: bb1 = bb - B*x */ 1596 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1597 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1598 1599 /* local sweep */ 1600 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1601 } 1602 } else if (flag & SOR_EISENSTAT) { 1603 Vec xx1; 1604 1605 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1607 1608 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1609 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1610 if (!mat->diag) { 1611 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1612 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1613 } 1614 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1615 if (hasop) { 1616 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1617 } else { 1618 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1619 } 1620 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1621 1622 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1623 1624 /* local sweep */ 1625 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1626 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1627 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1628 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1629 1630 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1631 1632 matin->factorerrortype = mat->A->factorerrortype; 1633 PetscFunctionReturn(0); 1634 } 1635 1636 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1637 { 1638 Mat aA,aB,Aperm; 1639 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1640 PetscScalar *aa,*ba; 1641 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1642 PetscSF rowsf,sf; 1643 IS parcolp = NULL; 1644 PetscBool done; 1645 PetscErrorCode ierr; 1646 1647 PetscFunctionBegin; 1648 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1649 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1650 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1651 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1652 1653 /* Invert row permutation to find out where my rows should go */ 1654 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1655 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1656 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1657 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1658 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1659 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1660 1661 /* Invert column permutation to find out where my columns should go */ 1662 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1663 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1664 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1665 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1666 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1667 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1668 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1669 1670 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1671 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1672 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1673 1674 /* Find out where my gcols should go */ 1675 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1676 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1677 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1678 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1679 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1680 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1681 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1682 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1683 1684 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1685 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1686 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1687 for (i=0; i<m; i++) { 1688 PetscInt row = rdest[i]; 1689 PetscMPIInt rowner; 1690 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1691 for (j=ai[i]; j<ai[i+1]; j++) { 1692 PetscInt col = cdest[aj[j]]; 1693 PetscMPIInt cowner; 1694 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1695 if (rowner == cowner) dnnz[i]++; 1696 else onnz[i]++; 1697 } 1698 for (j=bi[i]; j<bi[i+1]; j++) { 1699 PetscInt col = gcdest[bj[j]]; 1700 PetscMPIInt cowner; 1701 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1702 if (rowner == cowner) dnnz[i]++; 1703 else onnz[i]++; 1704 } 1705 } 1706 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1707 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1708 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1709 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1710 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1711 1712 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1713 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1714 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1715 for (i=0; i<m; i++) { 1716 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1717 PetscInt j0,rowlen; 1718 rowlen = ai[i+1] - ai[i]; 1719 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1720 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1721 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1722 } 1723 rowlen = bi[i+1] - bi[i]; 1724 for (j0=j=0; j<rowlen; j0=j) { 1725 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1726 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1727 } 1728 } 1729 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1730 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1731 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1732 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1733 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1734 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1735 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1736 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1737 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1738 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1739 *B = Aperm; 1740 PetscFunctionReturn(0); 1741 } 1742 1743 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1744 { 1745 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1746 PetscErrorCode ierr; 1747 1748 PetscFunctionBegin; 1749 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1750 if (ghosts) *ghosts = aij->garray; 1751 PetscFunctionReturn(0); 1752 } 1753 1754 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1755 { 1756 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1757 Mat A = mat->A,B = mat->B; 1758 PetscErrorCode ierr; 1759 PetscLogDouble isend[5],irecv[5]; 1760 1761 PetscFunctionBegin; 1762 info->block_size = 1.0; 1763 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1764 1765 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1766 isend[3] = info->memory; isend[4] = info->mallocs; 1767 1768 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1769 1770 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1771 isend[3] += info->memory; isend[4] += info->mallocs; 1772 if (flag == MAT_LOCAL) { 1773 info->nz_used = isend[0]; 1774 info->nz_allocated = isend[1]; 1775 info->nz_unneeded = isend[2]; 1776 info->memory = isend[3]; 1777 info->mallocs = isend[4]; 1778 } else if (flag == MAT_GLOBAL_MAX) { 1779 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1780 1781 info->nz_used = irecv[0]; 1782 info->nz_allocated = irecv[1]; 1783 info->nz_unneeded = irecv[2]; 1784 info->memory = irecv[3]; 1785 info->mallocs = irecv[4]; 1786 } else if (flag == MAT_GLOBAL_SUM) { 1787 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1788 1789 info->nz_used = irecv[0]; 1790 info->nz_allocated = irecv[1]; 1791 info->nz_unneeded = irecv[2]; 1792 info->memory = irecv[3]; 1793 info->mallocs = irecv[4]; 1794 } 1795 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1796 info->fill_ratio_needed = 0; 1797 info->factor_mallocs = 0; 1798 PetscFunctionReturn(0); 1799 } 1800 1801 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1802 { 1803 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1804 PetscErrorCode ierr; 1805 1806 PetscFunctionBegin; 1807 switch (op) { 1808 case MAT_NEW_NONZERO_LOCATIONS: 1809 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1810 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1811 case MAT_KEEP_NONZERO_PATTERN: 1812 case MAT_NEW_NONZERO_LOCATION_ERR: 1813 case MAT_USE_INODES: 1814 case MAT_IGNORE_ZERO_ENTRIES: 1815 MatCheckPreallocated(A,1); 1816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1817 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1818 break; 1819 case MAT_ROW_ORIENTED: 1820 MatCheckPreallocated(A,1); 1821 a->roworiented = flg; 1822 1823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1824 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1825 break; 1826 case MAT_FORCE_DIAGONAL_ENTRIES: 1827 case MAT_SORTED_FULL: 1828 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1829 break; 1830 case MAT_IGNORE_OFF_PROC_ENTRIES: 1831 a->donotstash = flg; 1832 break; 1833 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1834 case MAT_SPD: 1835 case MAT_SYMMETRIC: 1836 case MAT_STRUCTURALLY_SYMMETRIC: 1837 case MAT_HERMITIAN: 1838 case MAT_SYMMETRY_ETERNAL: 1839 break; 1840 case MAT_SUBMAT_SINGLEIS: 1841 A->submat_singleis = flg; 1842 break; 1843 case MAT_STRUCTURE_ONLY: 1844 /* The option is handled directly by MatSetOption() */ 1845 break; 1846 default: 1847 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1848 } 1849 PetscFunctionReturn(0); 1850 } 1851 1852 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1853 { 1854 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1855 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1856 PetscErrorCode ierr; 1857 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1858 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1859 PetscInt *cmap,*idx_p; 1860 1861 PetscFunctionBegin; 1862 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1863 mat->getrowactive = PETSC_TRUE; 1864 1865 if (!mat->rowvalues && (idx || v)) { 1866 /* 1867 allocate enough space to hold information from the longest row. 1868 */ 1869 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1870 PetscInt max = 1,tmp; 1871 for (i=0; i<matin->rmap->n; i++) { 1872 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1873 if (max < tmp) max = tmp; 1874 } 1875 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1876 } 1877 1878 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1879 lrow = row - rstart; 1880 1881 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1882 if (!v) {pvA = NULL; pvB = NULL;} 1883 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1884 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1885 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1886 nztot = nzA + nzB; 1887 1888 cmap = mat->garray; 1889 if (v || idx) { 1890 if (nztot) { 1891 /* Sort by increasing column numbers, assuming A and B already sorted */ 1892 PetscInt imark = -1; 1893 if (v) { 1894 *v = v_p = mat->rowvalues; 1895 for (i=0; i<nzB; i++) { 1896 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1897 else break; 1898 } 1899 imark = i; 1900 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1901 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1902 } 1903 if (idx) { 1904 *idx = idx_p = mat->rowindices; 1905 if (imark > -1) { 1906 for (i=0; i<imark; i++) { 1907 idx_p[i] = cmap[cworkB[i]]; 1908 } 1909 } else { 1910 for (i=0; i<nzB; i++) { 1911 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1912 else break; 1913 } 1914 imark = i; 1915 } 1916 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1917 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1918 } 1919 } else { 1920 if (idx) *idx = NULL; 1921 if (v) *v = NULL; 1922 } 1923 } 1924 *nz = nztot; 1925 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1926 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1931 { 1932 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1933 1934 PetscFunctionBegin; 1935 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1936 aij->getrowactive = PETSC_FALSE; 1937 PetscFunctionReturn(0); 1938 } 1939 1940 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1941 { 1942 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1943 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1944 PetscErrorCode ierr; 1945 PetscInt i,j,cstart = mat->cmap->rstart; 1946 PetscReal sum = 0.0; 1947 MatScalar *v; 1948 1949 PetscFunctionBegin; 1950 if (aij->size == 1) { 1951 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1952 } else { 1953 if (type == NORM_FROBENIUS) { 1954 v = amat->a; 1955 for (i=0; i<amat->nz; i++) { 1956 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1957 } 1958 v = bmat->a; 1959 for (i=0; i<bmat->nz; i++) { 1960 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1961 } 1962 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1963 *norm = PetscSqrtReal(*norm); 1964 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1965 } else if (type == NORM_1) { /* max column norm */ 1966 PetscReal *tmp,*tmp2; 1967 PetscInt *jj,*garray = aij->garray; 1968 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1969 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1970 *norm = 0.0; 1971 v = amat->a; jj = amat->j; 1972 for (j=0; j<amat->nz; j++) { 1973 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1974 } 1975 v = bmat->a; jj = bmat->j; 1976 for (j=0; j<bmat->nz; j++) { 1977 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1978 } 1979 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1980 for (j=0; j<mat->cmap->N; j++) { 1981 if (tmp2[j] > *norm) *norm = tmp2[j]; 1982 } 1983 ierr = PetscFree(tmp);CHKERRQ(ierr); 1984 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1985 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1986 } else if (type == NORM_INFINITY) { /* max row norm */ 1987 PetscReal ntemp = 0.0; 1988 for (j=0; j<aij->A->rmap->n; j++) { 1989 v = amat->a + amat->i[j]; 1990 sum = 0.0; 1991 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1992 sum += PetscAbsScalar(*v); v++; 1993 } 1994 v = bmat->a + bmat->i[j]; 1995 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1996 sum += PetscAbsScalar(*v); v++; 1997 } 1998 if (sum > ntemp) ntemp = sum; 1999 } 2000 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2001 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2002 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2003 } 2004 PetscFunctionReturn(0); 2005 } 2006 2007 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2008 { 2009 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2010 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2011 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2012 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2013 PetscErrorCode ierr; 2014 Mat B,A_diag,*B_diag; 2015 const MatScalar *array; 2016 2017 PetscFunctionBegin; 2018 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2019 ai = Aloc->i; aj = Aloc->j; 2020 bi = Bloc->i; bj = Bloc->j; 2021 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2022 PetscInt *d_nnz,*g_nnz,*o_nnz; 2023 PetscSFNode *oloc; 2024 PETSC_UNUSED PetscSF sf; 2025 2026 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2027 /* compute d_nnz for preallocation */ 2028 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2029 for (i=0; i<ai[ma]; i++) { 2030 d_nnz[aj[i]]++; 2031 } 2032 /* compute local off-diagonal contributions */ 2033 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2034 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2035 /* map those to global */ 2036 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2037 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2038 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2039 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2040 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2041 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2042 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2043 2044 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2045 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2046 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2047 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2048 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2049 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2050 } else { 2051 B = *matout; 2052 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2053 } 2054 2055 b = (Mat_MPIAIJ*)B->data; 2056 A_diag = a->A; 2057 B_diag = &b->A; 2058 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2059 A_diag_ncol = A_diag->cmap->N; 2060 B_diag_ilen = sub_B_diag->ilen; 2061 B_diag_i = sub_B_diag->i; 2062 2063 /* Set ilen for diagonal of B */ 2064 for (i=0; i<A_diag_ncol; i++) { 2065 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2066 } 2067 2068 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2069 very quickly (=without using MatSetValues), because all writes are local. */ 2070 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2071 2072 /* copy over the B part */ 2073 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2074 array = Bloc->a; 2075 row = A->rmap->rstart; 2076 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2077 cols_tmp = cols; 2078 for (i=0; i<mb; i++) { 2079 ncol = bi[i+1]-bi[i]; 2080 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2081 row++; 2082 array += ncol; cols_tmp += ncol; 2083 } 2084 ierr = PetscFree(cols);CHKERRQ(ierr); 2085 2086 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2087 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2088 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2089 *matout = B; 2090 } else { 2091 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2092 } 2093 PetscFunctionReturn(0); 2094 } 2095 2096 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2097 { 2098 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2099 Mat a = aij->A,b = aij->B; 2100 PetscErrorCode ierr; 2101 PetscInt s1,s2,s3; 2102 2103 PetscFunctionBegin; 2104 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2105 if (rr) { 2106 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2107 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2108 /* Overlap communication with computation. */ 2109 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2110 } 2111 if (ll) { 2112 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2113 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2114 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 2115 } 2116 /* scale the diagonal block */ 2117 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2118 2119 if (rr) { 2120 /* Do a scatter end and then right scale the off-diagonal block */ 2121 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2122 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2123 } 2124 PetscFunctionReturn(0); 2125 } 2126 2127 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2128 { 2129 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2130 PetscErrorCode ierr; 2131 2132 PetscFunctionBegin; 2133 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2134 PetscFunctionReturn(0); 2135 } 2136 2137 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2138 { 2139 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2140 Mat a,b,c,d; 2141 PetscBool flg; 2142 PetscErrorCode ierr; 2143 2144 PetscFunctionBegin; 2145 a = matA->A; b = matA->B; 2146 c = matB->A; d = matB->B; 2147 2148 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2149 if (flg) { 2150 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2151 } 2152 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2153 PetscFunctionReturn(0); 2154 } 2155 2156 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2157 { 2158 PetscErrorCode ierr; 2159 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2160 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2161 2162 PetscFunctionBegin; 2163 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2164 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2165 /* because of the column compression in the off-processor part of the matrix a->B, 2166 the number of columns in a->B and b->B may be different, hence we cannot call 2167 the MatCopy() directly on the two parts. If need be, we can provide a more 2168 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2169 then copying the submatrices */ 2170 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2171 } else { 2172 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2173 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2174 } 2175 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2180 { 2181 PetscErrorCode ierr; 2182 2183 PetscFunctionBegin; 2184 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2185 PetscFunctionReturn(0); 2186 } 2187 2188 /* 2189 Computes the number of nonzeros per row needed for preallocation when X and Y 2190 have different nonzero structure. 2191 */ 2192 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2193 { 2194 PetscInt i,j,k,nzx,nzy; 2195 2196 PetscFunctionBegin; 2197 /* Set the number of nonzeros in the new matrix */ 2198 for (i=0; i<m; i++) { 2199 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2200 nzx = xi[i+1] - xi[i]; 2201 nzy = yi[i+1] - yi[i]; 2202 nnz[i] = 0; 2203 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2204 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2205 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2206 nnz[i]++; 2207 } 2208 for (; k<nzy; k++) nnz[i]++; 2209 } 2210 PetscFunctionReturn(0); 2211 } 2212 2213 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2214 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2215 { 2216 PetscErrorCode ierr; 2217 PetscInt m = Y->rmap->N; 2218 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2219 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2220 2221 PetscFunctionBegin; 2222 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2223 PetscFunctionReturn(0); 2224 } 2225 2226 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2227 { 2228 PetscErrorCode ierr; 2229 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2230 PetscBLASInt bnz,one=1; 2231 Mat_SeqAIJ *x,*y; 2232 2233 PetscFunctionBegin; 2234 if (str == SAME_NONZERO_PATTERN) { 2235 PetscScalar alpha = a; 2236 x = (Mat_SeqAIJ*)xx->A->data; 2237 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2238 y = (Mat_SeqAIJ*)yy->A->data; 2239 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2240 x = (Mat_SeqAIJ*)xx->B->data; 2241 y = (Mat_SeqAIJ*)yy->B->data; 2242 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2243 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2244 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2245 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2246 will be updated */ 2247 #if defined(PETSC_HAVE_DEVICE) 2248 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2249 Y->offloadmask = PETSC_OFFLOAD_CPU; 2250 } 2251 #endif 2252 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2253 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2254 } else { 2255 Mat B; 2256 PetscInt *nnz_d,*nnz_o; 2257 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2258 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2259 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2260 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2261 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2262 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2263 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2264 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2265 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2266 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2267 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2268 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2269 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2270 } 2271 PetscFunctionReturn(0); 2272 } 2273 2274 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2275 2276 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2277 { 2278 #if defined(PETSC_USE_COMPLEX) 2279 PetscErrorCode ierr; 2280 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2281 2282 PetscFunctionBegin; 2283 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2284 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2285 #else 2286 PetscFunctionBegin; 2287 #endif 2288 PetscFunctionReturn(0); 2289 } 2290 2291 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2292 { 2293 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2294 PetscErrorCode ierr; 2295 2296 PetscFunctionBegin; 2297 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2298 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2303 { 2304 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2305 PetscErrorCode ierr; 2306 2307 PetscFunctionBegin; 2308 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2309 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2310 PetscFunctionReturn(0); 2311 } 2312 2313 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2314 { 2315 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2316 PetscErrorCode ierr; 2317 PetscInt i,*idxb = NULL,m = A->rmap->n; 2318 PetscScalar *va,*vv; 2319 Vec vB,vA; 2320 const PetscScalar *vb; 2321 2322 PetscFunctionBegin; 2323 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2324 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2325 2326 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2327 if (idx) { 2328 for (i=0; i<m; i++) { 2329 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2330 } 2331 } 2332 2333 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2334 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2335 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2336 2337 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2338 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2339 for (i=0; i<m; i++) { 2340 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2341 vv[i] = vb[i]; 2342 if (idx) idx[i] = a->garray[idxb[i]]; 2343 } else { 2344 vv[i] = va[i]; 2345 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2346 idx[i] = a->garray[idxb[i]]; 2347 } 2348 } 2349 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2350 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2351 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2352 ierr = PetscFree(idxb);CHKERRQ(ierr); 2353 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2354 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2355 PetscFunctionReturn(0); 2356 } 2357 2358 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2359 { 2360 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2361 PetscInt m = A->rmap->n,n = A->cmap->n; 2362 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2363 PetscInt *cmap = mat->garray; 2364 PetscInt *diagIdx, *offdiagIdx; 2365 Vec diagV, offdiagV; 2366 PetscScalar *a, *diagA, *offdiagA, *ba; 2367 PetscInt r,j,col,ncols,*bi,*bj; 2368 PetscErrorCode ierr; 2369 Mat B = mat->B; 2370 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2371 2372 PetscFunctionBegin; 2373 /* When a process holds entire A and other processes have no entry */ 2374 if (A->cmap->N == n) { 2375 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2376 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2377 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2378 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2379 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } else if (n == 0) { 2382 if (m) { 2383 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2384 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2385 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2386 } 2387 PetscFunctionReturn(0); 2388 } 2389 2390 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2391 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2392 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2393 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2394 2395 /* Get offdiagIdx[] for implicit 0.0 */ 2396 ba = b->a; 2397 bi = b->i; 2398 bj = b->j; 2399 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2400 for (r = 0; r < m; r++) { 2401 ncols = bi[r+1] - bi[r]; 2402 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2403 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2404 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2405 offdiagA[r] = 0.0; 2406 2407 /* Find first hole in the cmap */ 2408 for (j=0; j<ncols; j++) { 2409 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2410 if (col > j && j < cstart) { 2411 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2412 break; 2413 } else if (col > j + n && j >= cstart) { 2414 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2415 break; 2416 } 2417 } 2418 if (j == ncols && ncols < A->cmap->N - n) { 2419 /* a hole is outside compressed Bcols */ 2420 if (ncols == 0) { 2421 if (cstart) { 2422 offdiagIdx[r] = 0; 2423 } else offdiagIdx[r] = cend; 2424 } else { /* ncols > 0 */ 2425 offdiagIdx[r] = cmap[ncols-1] + 1; 2426 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2427 } 2428 } 2429 } 2430 2431 for (j=0; j<ncols; j++) { 2432 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2433 ba++; bj++; 2434 } 2435 } 2436 2437 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2438 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2439 for (r = 0; r < m; ++r) { 2440 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2441 a[r] = diagA[r]; 2442 if (idx) idx[r] = cstart + diagIdx[r]; 2443 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2444 a[r] = diagA[r]; 2445 if (idx) { 2446 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2447 idx[r] = cstart + diagIdx[r]; 2448 } else idx[r] = offdiagIdx[r]; 2449 } 2450 } else { 2451 a[r] = offdiagA[r]; 2452 if (idx) idx[r] = offdiagIdx[r]; 2453 } 2454 } 2455 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2456 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2457 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2458 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2459 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2460 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2465 { 2466 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2467 PetscInt m = A->rmap->n,n = A->cmap->n; 2468 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2469 PetscInt *cmap = mat->garray; 2470 PetscInt *diagIdx, *offdiagIdx; 2471 Vec diagV, offdiagV; 2472 PetscScalar *a, *diagA, *offdiagA, *ba; 2473 PetscInt r,j,col,ncols,*bi,*bj; 2474 PetscErrorCode ierr; 2475 Mat B = mat->B; 2476 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2477 2478 PetscFunctionBegin; 2479 /* When a process holds entire A and other processes have no entry */ 2480 if (A->cmap->N == n) { 2481 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2482 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2483 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2484 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2485 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2486 PetscFunctionReturn(0); 2487 } else if (n == 0) { 2488 if (m) { 2489 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2490 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2491 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2492 } 2493 PetscFunctionReturn(0); 2494 } 2495 2496 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2497 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2498 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2499 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2500 2501 /* Get offdiagIdx[] for implicit 0.0 */ 2502 ba = b->a; 2503 bi = b->i; 2504 bj = b->j; 2505 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2506 for (r = 0; r < m; r++) { 2507 ncols = bi[r+1] - bi[r]; 2508 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2509 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2510 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2511 offdiagA[r] = 0.0; 2512 2513 /* Find first hole in the cmap */ 2514 for (j=0; j<ncols; j++) { 2515 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2516 if (col > j && j < cstart) { 2517 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2518 break; 2519 } else if (col > j + n && j >= cstart) { 2520 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2521 break; 2522 } 2523 } 2524 if (j == ncols && ncols < A->cmap->N - n) { 2525 /* a hole is outside compressed Bcols */ 2526 if (ncols == 0) { 2527 if (cstart) { 2528 offdiagIdx[r] = 0; 2529 } else offdiagIdx[r] = cend; 2530 } else { /* ncols > 0 */ 2531 offdiagIdx[r] = cmap[ncols-1] + 1; 2532 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2533 } 2534 } 2535 } 2536 2537 for (j=0; j<ncols; j++) { 2538 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2539 ba++; bj++; 2540 } 2541 } 2542 2543 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2544 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2545 for (r = 0; r < m; ++r) { 2546 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2547 a[r] = diagA[r]; 2548 if (idx) idx[r] = cstart + diagIdx[r]; 2549 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2550 a[r] = diagA[r]; 2551 if (idx) { 2552 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2553 idx[r] = cstart + diagIdx[r]; 2554 } else idx[r] = offdiagIdx[r]; 2555 } 2556 } else { 2557 a[r] = offdiagA[r]; 2558 if (idx) idx[r] = offdiagIdx[r]; 2559 } 2560 } 2561 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2562 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2563 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2564 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2565 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2566 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2567 PetscFunctionReturn(0); 2568 } 2569 2570 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2571 { 2572 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2573 PetscInt m = A->rmap->n,n = A->cmap->n; 2574 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2575 PetscInt *cmap = mat->garray; 2576 PetscInt *diagIdx, *offdiagIdx; 2577 Vec diagV, offdiagV; 2578 PetscScalar *a, *diagA, *offdiagA, *ba; 2579 PetscInt r,j,col,ncols,*bi,*bj; 2580 PetscErrorCode ierr; 2581 Mat B = mat->B; 2582 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2583 2584 PetscFunctionBegin; 2585 /* When a process holds entire A and other processes have no entry */ 2586 if (A->cmap->N == n) { 2587 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2588 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2589 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2590 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2591 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2592 PetscFunctionReturn(0); 2593 } else if (n == 0) { 2594 if (m) { 2595 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2596 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2597 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2598 } 2599 PetscFunctionReturn(0); 2600 } 2601 2602 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2603 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2604 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2605 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2606 2607 /* Get offdiagIdx[] for implicit 0.0 */ 2608 ba = b->a; 2609 bi = b->i; 2610 bj = b->j; 2611 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2612 for (r = 0; r < m; r++) { 2613 ncols = bi[r+1] - bi[r]; 2614 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2615 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2616 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2617 offdiagA[r] = 0.0; 2618 2619 /* Find first hole in the cmap */ 2620 for (j=0; j<ncols; j++) { 2621 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2622 if (col > j && j < cstart) { 2623 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2624 break; 2625 } else if (col > j + n && j >= cstart) { 2626 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2627 break; 2628 } 2629 } 2630 if (j == ncols && ncols < A->cmap->N - n) { 2631 /* a hole is outside compressed Bcols */ 2632 if (ncols == 0) { 2633 if (cstart) { 2634 offdiagIdx[r] = 0; 2635 } else offdiagIdx[r] = cend; 2636 } else { /* ncols > 0 */ 2637 offdiagIdx[r] = cmap[ncols-1] + 1; 2638 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2639 } 2640 } 2641 } 2642 2643 for (j=0; j<ncols; j++) { 2644 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2645 ba++; bj++; 2646 } 2647 } 2648 2649 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2650 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2651 for (r = 0; r < m; ++r) { 2652 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2653 a[r] = diagA[r]; 2654 if (idx) idx[r] = cstart + diagIdx[r]; 2655 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2656 a[r] = diagA[r]; 2657 if (idx) { 2658 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2659 idx[r] = cstart + diagIdx[r]; 2660 } else idx[r] = offdiagIdx[r]; 2661 } 2662 } else { 2663 a[r] = offdiagA[r]; 2664 if (idx) idx[r] = offdiagIdx[r]; 2665 } 2666 } 2667 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2668 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2669 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2670 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2671 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2672 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2673 PetscFunctionReturn(0); 2674 } 2675 2676 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2677 { 2678 PetscErrorCode ierr; 2679 Mat *dummy; 2680 2681 PetscFunctionBegin; 2682 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2683 *newmat = *dummy; 2684 ierr = PetscFree(dummy);CHKERRQ(ierr); 2685 PetscFunctionReturn(0); 2686 } 2687 2688 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2689 { 2690 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2691 PetscErrorCode ierr; 2692 2693 PetscFunctionBegin; 2694 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2695 A->factorerrortype = a->A->factorerrortype; 2696 PetscFunctionReturn(0); 2697 } 2698 2699 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2700 { 2701 PetscErrorCode ierr; 2702 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2703 2704 PetscFunctionBegin; 2705 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2706 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2707 if (x->assembled) { 2708 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2709 } else { 2710 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2711 } 2712 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2713 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2714 PetscFunctionReturn(0); 2715 } 2716 2717 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2718 { 2719 PetscFunctionBegin; 2720 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2721 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2722 PetscFunctionReturn(0); 2723 } 2724 2725 /*@ 2726 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2727 2728 Collective on Mat 2729 2730 Input Parameters: 2731 + A - the matrix 2732 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2733 2734 Level: advanced 2735 2736 @*/ 2737 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2738 { 2739 PetscErrorCode ierr; 2740 2741 PetscFunctionBegin; 2742 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2743 PetscFunctionReturn(0); 2744 } 2745 2746 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2747 { 2748 PetscErrorCode ierr; 2749 PetscBool sc = PETSC_FALSE,flg; 2750 2751 PetscFunctionBegin; 2752 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2753 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2754 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2755 if (flg) { 2756 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2757 } 2758 ierr = PetscOptionsTail();CHKERRQ(ierr); 2759 PetscFunctionReturn(0); 2760 } 2761 2762 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2763 { 2764 PetscErrorCode ierr; 2765 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2766 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2767 2768 PetscFunctionBegin; 2769 if (!Y->preallocated) { 2770 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2771 } else if (!aij->nz) { 2772 PetscInt nonew = aij->nonew; 2773 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2774 aij->nonew = nonew; 2775 } 2776 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2777 PetscFunctionReturn(0); 2778 } 2779 2780 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2781 { 2782 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2783 PetscErrorCode ierr; 2784 2785 PetscFunctionBegin; 2786 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2787 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2788 if (d) { 2789 PetscInt rstart; 2790 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2791 *d += rstart; 2792 2793 } 2794 PetscFunctionReturn(0); 2795 } 2796 2797 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2798 { 2799 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2800 PetscErrorCode ierr; 2801 2802 PetscFunctionBegin; 2803 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2804 PetscFunctionReturn(0); 2805 } 2806 2807 /* -------------------------------------------------------------------*/ 2808 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2809 MatGetRow_MPIAIJ, 2810 MatRestoreRow_MPIAIJ, 2811 MatMult_MPIAIJ, 2812 /* 4*/ MatMultAdd_MPIAIJ, 2813 MatMultTranspose_MPIAIJ, 2814 MatMultTransposeAdd_MPIAIJ, 2815 NULL, 2816 NULL, 2817 NULL, 2818 /*10*/ NULL, 2819 NULL, 2820 NULL, 2821 MatSOR_MPIAIJ, 2822 MatTranspose_MPIAIJ, 2823 /*15*/ MatGetInfo_MPIAIJ, 2824 MatEqual_MPIAIJ, 2825 MatGetDiagonal_MPIAIJ, 2826 MatDiagonalScale_MPIAIJ, 2827 MatNorm_MPIAIJ, 2828 /*20*/ MatAssemblyBegin_MPIAIJ, 2829 MatAssemblyEnd_MPIAIJ, 2830 MatSetOption_MPIAIJ, 2831 MatZeroEntries_MPIAIJ, 2832 /*24*/ MatZeroRows_MPIAIJ, 2833 NULL, 2834 NULL, 2835 NULL, 2836 NULL, 2837 /*29*/ MatSetUp_MPIAIJ, 2838 NULL, 2839 NULL, 2840 MatGetDiagonalBlock_MPIAIJ, 2841 NULL, 2842 /*34*/ MatDuplicate_MPIAIJ, 2843 NULL, 2844 NULL, 2845 NULL, 2846 NULL, 2847 /*39*/ MatAXPY_MPIAIJ, 2848 MatCreateSubMatrices_MPIAIJ, 2849 MatIncreaseOverlap_MPIAIJ, 2850 MatGetValues_MPIAIJ, 2851 MatCopy_MPIAIJ, 2852 /*44*/ MatGetRowMax_MPIAIJ, 2853 MatScale_MPIAIJ, 2854 MatShift_MPIAIJ, 2855 MatDiagonalSet_MPIAIJ, 2856 MatZeroRowsColumns_MPIAIJ, 2857 /*49*/ MatSetRandom_MPIAIJ, 2858 NULL, 2859 NULL, 2860 NULL, 2861 NULL, 2862 /*54*/ MatFDColoringCreate_MPIXAIJ, 2863 NULL, 2864 MatSetUnfactored_MPIAIJ, 2865 MatPermute_MPIAIJ, 2866 NULL, 2867 /*59*/ MatCreateSubMatrix_MPIAIJ, 2868 MatDestroy_MPIAIJ, 2869 MatView_MPIAIJ, 2870 NULL, 2871 NULL, 2872 /*64*/ NULL, 2873 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2874 NULL, 2875 NULL, 2876 NULL, 2877 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2878 MatGetRowMinAbs_MPIAIJ, 2879 NULL, 2880 NULL, 2881 NULL, 2882 NULL, 2883 /*75*/ MatFDColoringApply_AIJ, 2884 MatSetFromOptions_MPIAIJ, 2885 NULL, 2886 NULL, 2887 MatFindZeroDiagonals_MPIAIJ, 2888 /*80*/ NULL, 2889 NULL, 2890 NULL, 2891 /*83*/ MatLoad_MPIAIJ, 2892 MatIsSymmetric_MPIAIJ, 2893 NULL, 2894 NULL, 2895 NULL, 2896 NULL, 2897 /*89*/ NULL, 2898 NULL, 2899 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2900 NULL, 2901 NULL, 2902 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2903 NULL, 2904 NULL, 2905 NULL, 2906 MatBindToCPU_MPIAIJ, 2907 /*99*/ MatProductSetFromOptions_MPIAIJ, 2908 NULL, 2909 NULL, 2910 MatConjugate_MPIAIJ, 2911 NULL, 2912 /*104*/MatSetValuesRow_MPIAIJ, 2913 MatRealPart_MPIAIJ, 2914 MatImaginaryPart_MPIAIJ, 2915 NULL, 2916 NULL, 2917 /*109*/NULL, 2918 NULL, 2919 MatGetRowMin_MPIAIJ, 2920 NULL, 2921 MatMissingDiagonal_MPIAIJ, 2922 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2923 NULL, 2924 MatGetGhosts_MPIAIJ, 2925 NULL, 2926 NULL, 2927 /*119*/MatMultDiagonalBlock_MPIAIJ, 2928 NULL, 2929 NULL, 2930 NULL, 2931 MatGetMultiProcBlock_MPIAIJ, 2932 /*124*/MatFindNonzeroRows_MPIAIJ, 2933 MatGetColumnNorms_MPIAIJ, 2934 MatInvertBlockDiagonal_MPIAIJ, 2935 MatInvertVariableBlockDiagonal_MPIAIJ, 2936 MatCreateSubMatricesMPI_MPIAIJ, 2937 /*129*/NULL, 2938 NULL, 2939 NULL, 2940 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2941 NULL, 2942 /*134*/NULL, 2943 NULL, 2944 NULL, 2945 NULL, 2946 NULL, 2947 /*139*/MatSetBlockSizes_MPIAIJ, 2948 NULL, 2949 NULL, 2950 MatFDColoringSetUp_MPIXAIJ, 2951 MatFindOffBlockDiagonalEntries_MPIAIJ, 2952 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2953 /*145*/NULL, 2954 NULL, 2955 NULL 2956 }; 2957 2958 /* ----------------------------------------------------------------------------------------*/ 2959 2960 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2961 { 2962 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2963 PetscErrorCode ierr; 2964 2965 PetscFunctionBegin; 2966 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2967 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2968 PetscFunctionReturn(0); 2969 } 2970 2971 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2972 { 2973 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2974 PetscErrorCode ierr; 2975 2976 PetscFunctionBegin; 2977 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2978 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2979 PetscFunctionReturn(0); 2980 } 2981 2982 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2983 { 2984 Mat_MPIAIJ *b; 2985 PetscErrorCode ierr; 2986 PetscMPIInt size; 2987 2988 PetscFunctionBegin; 2989 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2990 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2991 b = (Mat_MPIAIJ*)B->data; 2992 2993 #if defined(PETSC_USE_CTABLE) 2994 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2995 #else 2996 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2997 #endif 2998 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2999 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 3000 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 3001 3002 /* Because the B will have been resized we simply destroy it and create a new one each time */ 3003 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3004 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3005 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3006 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 3007 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3008 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3009 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3010 3011 if (!B->preallocated) { 3012 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3013 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3014 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3015 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3016 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3017 } 3018 3019 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3020 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3021 B->preallocated = PETSC_TRUE; 3022 B->was_assembled = PETSC_FALSE; 3023 B->assembled = PETSC_FALSE; 3024 PetscFunctionReturn(0); 3025 } 3026 3027 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 3028 { 3029 Mat_MPIAIJ *b; 3030 PetscErrorCode ierr; 3031 3032 PetscFunctionBegin; 3033 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3034 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3035 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3036 b = (Mat_MPIAIJ*)B->data; 3037 3038 #if defined(PETSC_USE_CTABLE) 3039 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 3040 #else 3041 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 3042 #endif 3043 ierr = PetscFree(b->garray);CHKERRQ(ierr); 3044 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 3045 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 3046 3047 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 3048 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 3049 B->preallocated = PETSC_TRUE; 3050 B->was_assembled = PETSC_FALSE; 3051 B->assembled = PETSC_FALSE; 3052 PetscFunctionReturn(0); 3053 } 3054 3055 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3056 { 3057 Mat mat; 3058 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3059 PetscErrorCode ierr; 3060 3061 PetscFunctionBegin; 3062 *newmat = NULL; 3063 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3064 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3065 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3066 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3067 a = (Mat_MPIAIJ*)mat->data; 3068 3069 mat->factortype = matin->factortype; 3070 mat->assembled = matin->assembled; 3071 mat->insertmode = NOT_SET_VALUES; 3072 mat->preallocated = matin->preallocated; 3073 3074 a->size = oldmat->size; 3075 a->rank = oldmat->rank; 3076 a->donotstash = oldmat->donotstash; 3077 a->roworiented = oldmat->roworiented; 3078 a->rowindices = NULL; 3079 a->rowvalues = NULL; 3080 a->getrowactive = PETSC_FALSE; 3081 3082 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3083 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3084 3085 if (oldmat->colmap) { 3086 #if defined(PETSC_USE_CTABLE) 3087 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3088 #else 3089 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 3090 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3091 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 3092 #endif 3093 } else a->colmap = NULL; 3094 if (oldmat->garray) { 3095 PetscInt len; 3096 len = oldmat->B->cmap->n; 3097 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 3098 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3099 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 3100 } else a->garray = NULL; 3101 3102 /* It may happen MatDuplicate is called with a non-assembled matrix 3103 In fact, MatDuplicate only requires the matrix to be preallocated 3104 This may happen inside a DMCreateMatrix_Shell */ 3105 if (oldmat->lvec) { 3106 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3107 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3108 } 3109 if (oldmat->Mvctx) { 3110 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3111 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3112 } 3113 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3114 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3115 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3116 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3117 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3118 *newmat = mat; 3119 PetscFunctionReturn(0); 3120 } 3121 3122 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3123 { 3124 PetscBool isbinary, ishdf5; 3125 PetscErrorCode ierr; 3126 3127 PetscFunctionBegin; 3128 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3129 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3130 /* force binary viewer to load .info file if it has not yet done so */ 3131 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3132 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3133 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3134 if (isbinary) { 3135 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3136 } else if (ishdf5) { 3137 #if defined(PETSC_HAVE_HDF5) 3138 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3139 #else 3140 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3141 #endif 3142 } else { 3143 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3144 } 3145 PetscFunctionReturn(0); 3146 } 3147 3148 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3149 { 3150 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3151 PetscInt *rowidxs,*colidxs; 3152 PetscScalar *matvals; 3153 PetscErrorCode ierr; 3154 3155 PetscFunctionBegin; 3156 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3157 3158 /* read in matrix header */ 3159 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3160 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3161 M = header[1]; N = header[2]; nz = header[3]; 3162 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3163 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3164 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3165 3166 /* set block sizes from the viewer's .info file */ 3167 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3168 /* set global sizes if not set already */ 3169 if (mat->rmap->N < 0) mat->rmap->N = M; 3170 if (mat->cmap->N < 0) mat->cmap->N = N; 3171 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3172 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3173 3174 /* check if the matrix sizes are correct */ 3175 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3176 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3177 3178 /* read in row lengths and build row indices */ 3179 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3180 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3181 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3182 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3183 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3184 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3185 /* read in column indices and matrix values */ 3186 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3187 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3188 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3189 /* store matrix indices and values */ 3190 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3191 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3192 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3193 PetscFunctionReturn(0); 3194 } 3195 3196 /* Not scalable because of ISAllGather() unless getting all columns. */ 3197 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3198 { 3199 PetscErrorCode ierr; 3200 IS iscol_local; 3201 PetscBool isstride; 3202 PetscMPIInt lisstride=0,gisstride; 3203 3204 PetscFunctionBegin; 3205 /* check if we are grabbing all columns*/ 3206 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3207 3208 if (isstride) { 3209 PetscInt start,len,mstart,mlen; 3210 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3211 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3212 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3213 if (mstart == start && mlen-mstart == len) lisstride = 1; 3214 } 3215 3216 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3217 if (gisstride) { 3218 PetscInt N; 3219 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3220 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3221 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3222 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3223 } else { 3224 PetscInt cbs; 3225 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3226 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3227 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3228 } 3229 3230 *isseq = iscol_local; 3231 PetscFunctionReturn(0); 3232 } 3233 3234 /* 3235 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3236 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3237 3238 Input Parameters: 3239 mat - matrix 3240 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3241 i.e., mat->rstart <= isrow[i] < mat->rend 3242 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3243 i.e., mat->cstart <= iscol[i] < mat->cend 3244 Output Parameter: 3245 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3246 iscol_o - sequential column index set for retrieving mat->B 3247 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3248 */ 3249 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3250 { 3251 PetscErrorCode ierr; 3252 Vec x,cmap; 3253 const PetscInt *is_idx; 3254 PetscScalar *xarray,*cmaparray; 3255 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3256 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3257 Mat B=a->B; 3258 Vec lvec=a->lvec,lcmap; 3259 PetscInt i,cstart,cend,Bn=B->cmap->N; 3260 MPI_Comm comm; 3261 VecScatter Mvctx=a->Mvctx; 3262 3263 PetscFunctionBegin; 3264 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3265 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3266 3267 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3268 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3269 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3270 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3271 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3272 3273 /* Get start indices */ 3274 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3275 isstart -= ncols; 3276 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3277 3278 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3279 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3280 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3281 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3282 for (i=0; i<ncols; i++) { 3283 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3284 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3285 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3286 } 3287 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3288 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3289 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3290 3291 /* Get iscol_d */ 3292 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3293 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3294 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3295 3296 /* Get isrow_d */ 3297 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3298 rstart = mat->rmap->rstart; 3299 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3300 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3301 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3302 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3303 3304 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3305 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3306 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3307 3308 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3309 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3310 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3311 3312 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3313 3314 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3315 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3316 3317 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3318 /* off-process column indices */ 3319 count = 0; 3320 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3321 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3322 3323 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3324 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3325 for (i=0; i<Bn; i++) { 3326 if (PetscRealPart(xarray[i]) > -1.0) { 3327 idx[count] = i; /* local column index in off-diagonal part B */ 3328 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3329 count++; 3330 } 3331 } 3332 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3333 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3334 3335 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3336 /* cannot ensure iscol_o has same blocksize as iscol! */ 3337 3338 ierr = PetscFree(idx);CHKERRQ(ierr); 3339 *garray = cmap1; 3340 3341 ierr = VecDestroy(&x);CHKERRQ(ierr); 3342 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3343 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3344 PetscFunctionReturn(0); 3345 } 3346 3347 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3348 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3349 { 3350 PetscErrorCode ierr; 3351 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3352 Mat M = NULL; 3353 MPI_Comm comm; 3354 IS iscol_d,isrow_d,iscol_o; 3355 Mat Asub = NULL,Bsub = NULL; 3356 PetscInt n; 3357 3358 PetscFunctionBegin; 3359 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3360 3361 if (call == MAT_REUSE_MATRIX) { 3362 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3363 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3364 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3365 3366 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3367 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3368 3369 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3370 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3371 3372 /* Update diagonal and off-diagonal portions of submat */ 3373 asub = (Mat_MPIAIJ*)(*submat)->data; 3374 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3375 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3376 if (n) { 3377 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3378 } 3379 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3380 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3381 3382 } else { /* call == MAT_INITIAL_MATRIX) */ 3383 const PetscInt *garray; 3384 PetscInt BsubN; 3385 3386 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3387 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3388 3389 /* Create local submatrices Asub and Bsub */ 3390 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3391 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3392 3393 /* Create submatrix M */ 3394 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3395 3396 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3397 asub = (Mat_MPIAIJ*)M->data; 3398 3399 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3400 n = asub->B->cmap->N; 3401 if (BsubN > n) { 3402 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3403 const PetscInt *idx; 3404 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3405 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3406 3407 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3408 j = 0; 3409 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3410 for (i=0; i<n; i++) { 3411 if (j >= BsubN) break; 3412 while (subgarray[i] > garray[j]) j++; 3413 3414 if (subgarray[i] == garray[j]) { 3415 idx_new[i] = idx[j++]; 3416 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3417 } 3418 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3419 3420 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3421 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3422 3423 } else if (BsubN < n) { 3424 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3425 } 3426 3427 ierr = PetscFree(garray);CHKERRQ(ierr); 3428 *submat = M; 3429 3430 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3431 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3432 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3433 3434 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3435 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3436 3437 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3438 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3439 } 3440 PetscFunctionReturn(0); 3441 } 3442 3443 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3444 { 3445 PetscErrorCode ierr; 3446 IS iscol_local=NULL,isrow_d; 3447 PetscInt csize; 3448 PetscInt n,i,j,start,end; 3449 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3450 MPI_Comm comm; 3451 3452 PetscFunctionBegin; 3453 /* If isrow has same processor distribution as mat, 3454 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3455 if (call == MAT_REUSE_MATRIX) { 3456 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3457 if (isrow_d) { 3458 sameRowDist = PETSC_TRUE; 3459 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3460 } else { 3461 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3462 if (iscol_local) { 3463 sameRowDist = PETSC_TRUE; 3464 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3465 } 3466 } 3467 } else { 3468 /* Check if isrow has same processor distribution as mat */ 3469 sameDist[0] = PETSC_FALSE; 3470 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3471 if (!n) { 3472 sameDist[0] = PETSC_TRUE; 3473 } else { 3474 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3475 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3476 if (i >= start && j < end) { 3477 sameDist[0] = PETSC_TRUE; 3478 } 3479 } 3480 3481 /* Check if iscol has same processor distribution as mat */ 3482 sameDist[1] = PETSC_FALSE; 3483 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3484 if (!n) { 3485 sameDist[1] = PETSC_TRUE; 3486 } else { 3487 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3488 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3489 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3490 } 3491 3492 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3493 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3494 sameRowDist = tsameDist[0]; 3495 } 3496 3497 if (sameRowDist) { 3498 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3499 /* isrow and iscol have same processor distribution as mat */ 3500 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3501 PetscFunctionReturn(0); 3502 } else { /* sameRowDist */ 3503 /* isrow has same processor distribution as mat */ 3504 if (call == MAT_INITIAL_MATRIX) { 3505 PetscBool sorted; 3506 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3507 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3508 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3509 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3510 3511 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3512 if (sorted) { 3513 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3514 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3515 PetscFunctionReturn(0); 3516 } 3517 } else { /* call == MAT_REUSE_MATRIX */ 3518 IS iscol_sub; 3519 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3520 if (iscol_sub) { 3521 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3522 PetscFunctionReturn(0); 3523 } 3524 } 3525 } 3526 } 3527 3528 /* General case: iscol -> iscol_local which has global size of iscol */ 3529 if (call == MAT_REUSE_MATRIX) { 3530 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3531 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3532 } else { 3533 if (!iscol_local) { 3534 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3535 } 3536 } 3537 3538 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3539 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3540 3541 if (call == MAT_INITIAL_MATRIX) { 3542 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3543 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3544 } 3545 PetscFunctionReturn(0); 3546 } 3547 3548 /*@C 3549 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3550 and "off-diagonal" part of the matrix in CSR format. 3551 3552 Collective 3553 3554 Input Parameters: 3555 + comm - MPI communicator 3556 . A - "diagonal" portion of matrix 3557 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3558 - garray - global index of B columns 3559 3560 Output Parameter: 3561 . mat - the matrix, with input A as its local diagonal matrix 3562 Level: advanced 3563 3564 Notes: 3565 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3566 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3567 3568 .seealso: MatCreateMPIAIJWithSplitArrays() 3569 @*/ 3570 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3571 { 3572 PetscErrorCode ierr; 3573 Mat_MPIAIJ *maij; 3574 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3575 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3576 PetscScalar *oa=b->a; 3577 Mat Bnew; 3578 PetscInt m,n,N; 3579 3580 PetscFunctionBegin; 3581 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3582 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3583 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3584 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3585 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3586 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3587 3588 /* Get global columns of mat */ 3589 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3590 3591 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3592 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3593 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3594 maij = (Mat_MPIAIJ*)(*mat)->data; 3595 3596 (*mat)->preallocated = PETSC_TRUE; 3597 3598 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3599 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3600 3601 /* Set A as diagonal portion of *mat */ 3602 maij->A = A; 3603 3604 nz = oi[m]; 3605 for (i=0; i<nz; i++) { 3606 col = oj[i]; 3607 oj[i] = garray[col]; 3608 } 3609 3610 /* Set Bnew as off-diagonal portion of *mat */ 3611 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3612 bnew = (Mat_SeqAIJ*)Bnew->data; 3613 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3614 maij->B = Bnew; 3615 3616 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3617 3618 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3619 b->free_a = PETSC_FALSE; 3620 b->free_ij = PETSC_FALSE; 3621 ierr = MatDestroy(&B);CHKERRQ(ierr); 3622 3623 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3624 bnew->free_a = PETSC_TRUE; 3625 bnew->free_ij = PETSC_TRUE; 3626 3627 /* condense columns of maij->B */ 3628 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3629 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3630 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3631 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3632 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3633 PetscFunctionReturn(0); 3634 } 3635 3636 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3637 3638 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3639 { 3640 PetscErrorCode ierr; 3641 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3642 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3643 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3644 Mat M,Msub,B=a->B; 3645 MatScalar *aa; 3646 Mat_SeqAIJ *aij; 3647 PetscInt *garray = a->garray,*colsub,Ncols; 3648 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3649 IS iscol_sub,iscmap; 3650 const PetscInt *is_idx,*cmap; 3651 PetscBool allcolumns=PETSC_FALSE; 3652 MPI_Comm comm; 3653 3654 PetscFunctionBegin; 3655 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3656 3657 if (call == MAT_REUSE_MATRIX) { 3658 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3659 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3660 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3661 3662 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3663 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3664 3665 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3666 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3667 3668 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3669 3670 } else { /* call == MAT_INITIAL_MATRIX) */ 3671 PetscBool flg; 3672 3673 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3674 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3675 3676 /* (1) iscol -> nonscalable iscol_local */ 3677 /* Check for special case: each processor gets entire matrix columns */ 3678 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3679 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3680 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3681 if (allcolumns) { 3682 iscol_sub = iscol_local; 3683 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3684 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3685 3686 } else { 3687 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3688 PetscInt *idx,*cmap1,k; 3689 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3690 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3691 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3692 count = 0; 3693 k = 0; 3694 for (i=0; i<Ncols; i++) { 3695 j = is_idx[i]; 3696 if (j >= cstart && j < cend) { 3697 /* diagonal part of mat */ 3698 idx[count] = j; 3699 cmap1[count++] = i; /* column index in submat */ 3700 } else if (Bn) { 3701 /* off-diagonal part of mat */ 3702 if (j == garray[k]) { 3703 idx[count] = j; 3704 cmap1[count++] = i; /* column index in submat */ 3705 } else if (j > garray[k]) { 3706 while (j > garray[k] && k < Bn-1) k++; 3707 if (j == garray[k]) { 3708 idx[count] = j; 3709 cmap1[count++] = i; /* column index in submat */ 3710 } 3711 } 3712 } 3713 } 3714 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3715 3716 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3717 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3718 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3719 3720 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3721 } 3722 3723 /* (3) Create sequential Msub */ 3724 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3725 } 3726 3727 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3728 aij = (Mat_SeqAIJ*)(Msub)->data; 3729 ii = aij->i; 3730 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3731 3732 /* 3733 m - number of local rows 3734 Ncols - number of columns (same on all processors) 3735 rstart - first row in new global matrix generated 3736 */ 3737 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3738 3739 if (call == MAT_INITIAL_MATRIX) { 3740 /* (4) Create parallel newmat */ 3741 PetscMPIInt rank,size; 3742 PetscInt csize; 3743 3744 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3745 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3746 3747 /* 3748 Determine the number of non-zeros in the diagonal and off-diagonal 3749 portions of the matrix in order to do correct preallocation 3750 */ 3751 3752 /* first get start and end of "diagonal" columns */ 3753 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3754 if (csize == PETSC_DECIDE) { 3755 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3756 if (mglobal == Ncols) { /* square matrix */ 3757 nlocal = m; 3758 } else { 3759 nlocal = Ncols/size + ((Ncols % size) > rank); 3760 } 3761 } else { 3762 nlocal = csize; 3763 } 3764 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3765 rstart = rend - nlocal; 3766 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3767 3768 /* next, compute all the lengths */ 3769 jj = aij->j; 3770 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3771 olens = dlens + m; 3772 for (i=0; i<m; i++) { 3773 jend = ii[i+1] - ii[i]; 3774 olen = 0; 3775 dlen = 0; 3776 for (j=0; j<jend; j++) { 3777 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3778 else dlen++; 3779 jj++; 3780 } 3781 olens[i] = olen; 3782 dlens[i] = dlen; 3783 } 3784 3785 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3786 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3787 3788 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3789 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3790 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3791 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3792 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3793 ierr = PetscFree(dlens);CHKERRQ(ierr); 3794 3795 } else { /* call == MAT_REUSE_MATRIX */ 3796 M = *newmat; 3797 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3798 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3799 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3800 /* 3801 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3802 rather than the slower MatSetValues(). 3803 */ 3804 M->was_assembled = PETSC_TRUE; 3805 M->assembled = PETSC_FALSE; 3806 } 3807 3808 /* (5) Set values of Msub to *newmat */ 3809 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3810 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3811 3812 jj = aij->j; 3813 aa = aij->a; 3814 for (i=0; i<m; i++) { 3815 row = rstart + i; 3816 nz = ii[i+1] - ii[i]; 3817 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3818 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3819 jj += nz; aa += nz; 3820 } 3821 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3822 3823 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3824 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3825 3826 ierr = PetscFree(colsub);CHKERRQ(ierr); 3827 3828 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3829 if (call == MAT_INITIAL_MATRIX) { 3830 *newmat = M; 3831 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3832 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3833 3834 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3835 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3836 3837 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3838 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3839 3840 if (iscol_local) { 3841 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3842 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3843 } 3844 } 3845 PetscFunctionReturn(0); 3846 } 3847 3848 /* 3849 Not great since it makes two copies of the submatrix, first an SeqAIJ 3850 in local and then by concatenating the local matrices the end result. 3851 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3852 3853 Note: This requires a sequential iscol with all indices. 3854 */ 3855 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3856 { 3857 PetscErrorCode ierr; 3858 PetscMPIInt rank,size; 3859 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3860 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3861 Mat M,Mreuse; 3862 MatScalar *aa,*vwork; 3863 MPI_Comm comm; 3864 Mat_SeqAIJ *aij; 3865 PetscBool colflag,allcolumns=PETSC_FALSE; 3866 3867 PetscFunctionBegin; 3868 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3869 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3870 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3871 3872 /* Check for special case: each processor gets entire matrix columns */ 3873 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3874 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3875 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3876 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3877 3878 if (call == MAT_REUSE_MATRIX) { 3879 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3880 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3881 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3882 } else { 3883 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3884 } 3885 3886 /* 3887 m - number of local rows 3888 n - number of columns (same on all processors) 3889 rstart - first row in new global matrix generated 3890 */ 3891 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3892 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3893 if (call == MAT_INITIAL_MATRIX) { 3894 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3895 ii = aij->i; 3896 jj = aij->j; 3897 3898 /* 3899 Determine the number of non-zeros in the diagonal and off-diagonal 3900 portions of the matrix in order to do correct preallocation 3901 */ 3902 3903 /* first get start and end of "diagonal" columns */ 3904 if (csize == PETSC_DECIDE) { 3905 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3906 if (mglobal == n) { /* square matrix */ 3907 nlocal = m; 3908 } else { 3909 nlocal = n/size + ((n % size) > rank); 3910 } 3911 } else { 3912 nlocal = csize; 3913 } 3914 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3915 rstart = rend - nlocal; 3916 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3917 3918 /* next, compute all the lengths */ 3919 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3920 olens = dlens + m; 3921 for (i=0; i<m; i++) { 3922 jend = ii[i+1] - ii[i]; 3923 olen = 0; 3924 dlen = 0; 3925 for (j=0; j<jend; j++) { 3926 if (*jj < rstart || *jj >= rend) olen++; 3927 else dlen++; 3928 jj++; 3929 } 3930 olens[i] = olen; 3931 dlens[i] = dlen; 3932 } 3933 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3934 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3935 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3936 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3937 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3938 ierr = PetscFree(dlens);CHKERRQ(ierr); 3939 } else { 3940 PetscInt ml,nl; 3941 3942 M = *newmat; 3943 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3944 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3945 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3946 /* 3947 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3948 rather than the slower MatSetValues(). 3949 */ 3950 M->was_assembled = PETSC_TRUE; 3951 M->assembled = PETSC_FALSE; 3952 } 3953 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3954 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3955 ii = aij->i; 3956 jj = aij->j; 3957 aa = aij->a; 3958 for (i=0; i<m; i++) { 3959 row = rstart + i; 3960 nz = ii[i+1] - ii[i]; 3961 cwork = jj; jj += nz; 3962 vwork = aa; aa += nz; 3963 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3964 } 3965 3966 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3967 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3968 *newmat = M; 3969 3970 /* save submatrix used in processor for next request */ 3971 if (call == MAT_INITIAL_MATRIX) { 3972 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3973 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3974 } 3975 PetscFunctionReturn(0); 3976 } 3977 3978 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3979 { 3980 PetscInt m,cstart, cend,j,nnz,i,d; 3981 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3982 const PetscInt *JJ; 3983 PetscErrorCode ierr; 3984 PetscBool nooffprocentries; 3985 3986 PetscFunctionBegin; 3987 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3988 3989 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3990 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3991 m = B->rmap->n; 3992 cstart = B->cmap->rstart; 3993 cend = B->cmap->rend; 3994 rstart = B->rmap->rstart; 3995 3996 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3997 3998 if (PetscDefined(USE_DEBUG)) { 3999 for (i=0; i<m; i++) { 4000 nnz = Ii[i+1]- Ii[i]; 4001 JJ = J + Ii[i]; 4002 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 4003 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 4004 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 4005 } 4006 } 4007 4008 for (i=0; i<m; i++) { 4009 nnz = Ii[i+1]- Ii[i]; 4010 JJ = J + Ii[i]; 4011 nnz_max = PetscMax(nnz_max,nnz); 4012 d = 0; 4013 for (j=0; j<nnz; j++) { 4014 if (cstart <= JJ[j] && JJ[j] < cend) d++; 4015 } 4016 d_nnz[i] = d; 4017 o_nnz[i] = nnz - d; 4018 } 4019 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 4020 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 4021 4022 for (i=0; i<m; i++) { 4023 ii = i + rstart; 4024 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 4025 } 4026 nooffprocentries = B->nooffprocentries; 4027 B->nooffprocentries = PETSC_TRUE; 4028 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4029 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4030 B->nooffprocentries = nooffprocentries; 4031 4032 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4033 PetscFunctionReturn(0); 4034 } 4035 4036 /*@ 4037 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4038 (the default parallel PETSc format). 4039 4040 Collective 4041 4042 Input Parameters: 4043 + B - the matrix 4044 . i - the indices into j for the start of each local row (starts with zero) 4045 . j - the column indices for each local row (starts with zero) 4046 - v - optional values in the matrix 4047 4048 Level: developer 4049 4050 Notes: 4051 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 4052 thus you CANNOT change the matrix entries by changing the values of v[] after you have 4053 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4054 4055 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4056 4057 The format which is used for the sparse matrix input, is equivalent to a 4058 row-major ordering.. i.e for the following matrix, the input data expected is 4059 as shown 4060 4061 $ 1 0 0 4062 $ 2 0 3 P0 4063 $ ------- 4064 $ 4 5 6 P1 4065 $ 4066 $ Process0 [P0]: rows_owned=[0,1] 4067 $ i = {0,1,3} [size = nrow+1 = 2+1] 4068 $ j = {0,0,2} [size = 3] 4069 $ v = {1,2,3} [size = 3] 4070 $ 4071 $ Process1 [P1]: rows_owned=[2] 4072 $ i = {0,3} [size = nrow+1 = 1+1] 4073 $ j = {0,1,2} [size = 3] 4074 $ v = {4,5,6} [size = 3] 4075 4076 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4077 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4078 @*/ 4079 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4080 { 4081 PetscErrorCode ierr; 4082 4083 PetscFunctionBegin; 4084 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4085 PetscFunctionReturn(0); 4086 } 4087 4088 /*@C 4089 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4090 (the default parallel PETSc format). For good matrix assembly performance 4091 the user should preallocate the matrix storage by setting the parameters 4092 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4093 performance can be increased by more than a factor of 50. 4094 4095 Collective 4096 4097 Input Parameters: 4098 + B - the matrix 4099 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4100 (same value is used for all local rows) 4101 . d_nnz - array containing the number of nonzeros in the various rows of the 4102 DIAGONAL portion of the local submatrix (possibly different for each row) 4103 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4104 The size of this array is equal to the number of local rows, i.e 'm'. 4105 For matrices that will be factored, you must leave room for (and set) 4106 the diagonal entry even if it is zero. 4107 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4108 submatrix (same value is used for all local rows). 4109 - o_nnz - array containing the number of nonzeros in the various rows of the 4110 OFF-DIAGONAL portion of the local submatrix (possibly different for 4111 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4112 structure. The size of this array is equal to the number 4113 of local rows, i.e 'm'. 4114 4115 If the *_nnz parameter is given then the *_nz parameter is ignored 4116 4117 The AIJ format (also called the Yale sparse matrix format or 4118 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4119 storage. The stored row and column indices begin with zero. 4120 See Users-Manual: ch_mat for details. 4121 4122 The parallel matrix is partitioned such that the first m0 rows belong to 4123 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4124 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4125 4126 The DIAGONAL portion of the local submatrix of a processor can be defined 4127 as the submatrix which is obtained by extraction the part corresponding to 4128 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4129 first row that belongs to the processor, r2 is the last row belonging to 4130 the this processor, and c1-c2 is range of indices of the local part of a 4131 vector suitable for applying the matrix to. This is an mxn matrix. In the 4132 common case of a square matrix, the row and column ranges are the same and 4133 the DIAGONAL part is also square. The remaining portion of the local 4134 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4135 4136 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4137 4138 You can call MatGetInfo() to get information on how effective the preallocation was; 4139 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4140 You can also run with the option -info and look for messages with the string 4141 malloc in them to see if additional memory allocation was needed. 4142 4143 Example usage: 4144 4145 Consider the following 8x8 matrix with 34 non-zero values, that is 4146 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4147 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4148 as follows: 4149 4150 .vb 4151 1 2 0 | 0 3 0 | 0 4 4152 Proc0 0 5 6 | 7 0 0 | 8 0 4153 9 0 10 | 11 0 0 | 12 0 4154 ------------------------------------- 4155 13 0 14 | 15 16 17 | 0 0 4156 Proc1 0 18 0 | 19 20 21 | 0 0 4157 0 0 0 | 22 23 0 | 24 0 4158 ------------------------------------- 4159 Proc2 25 26 27 | 0 0 28 | 29 0 4160 30 0 0 | 31 32 33 | 0 34 4161 .ve 4162 4163 This can be represented as a collection of submatrices as: 4164 4165 .vb 4166 A B C 4167 D E F 4168 G H I 4169 .ve 4170 4171 Where the submatrices A,B,C are owned by proc0, D,E,F are 4172 owned by proc1, G,H,I are owned by proc2. 4173 4174 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4175 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4176 The 'M','N' parameters are 8,8, and have the same values on all procs. 4177 4178 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4179 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4180 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4181 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4182 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4183 matrix, ans [DF] as another SeqAIJ matrix. 4184 4185 When d_nz, o_nz parameters are specified, d_nz storage elements are 4186 allocated for every row of the local diagonal submatrix, and o_nz 4187 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4188 One way to choose d_nz and o_nz is to use the max nonzerors per local 4189 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4190 In this case, the values of d_nz,o_nz are: 4191 .vb 4192 proc0 : dnz = 2, o_nz = 2 4193 proc1 : dnz = 3, o_nz = 2 4194 proc2 : dnz = 1, o_nz = 4 4195 .ve 4196 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4197 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4198 for proc3. i.e we are using 12+15+10=37 storage locations to store 4199 34 values. 4200 4201 When d_nnz, o_nnz parameters are specified, the storage is specified 4202 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4203 In the above case the values for d_nnz,o_nnz are: 4204 .vb 4205 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4206 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4207 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4208 .ve 4209 Here the space allocated is sum of all the above values i.e 34, and 4210 hence pre-allocation is perfect. 4211 4212 Level: intermediate 4213 4214 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4215 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4216 @*/ 4217 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4218 { 4219 PetscErrorCode ierr; 4220 4221 PetscFunctionBegin; 4222 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4223 PetscValidType(B,1); 4224 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4225 PetscFunctionReturn(0); 4226 } 4227 4228 /*@ 4229 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4230 CSR format for the local rows. 4231 4232 Collective 4233 4234 Input Parameters: 4235 + comm - MPI communicator 4236 . m - number of local rows (Cannot be PETSC_DECIDE) 4237 . n - This value should be the same as the local size used in creating the 4238 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4239 calculated if N is given) For square matrices n is almost always m. 4240 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4241 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4242 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4243 . j - column indices 4244 - a - matrix values 4245 4246 Output Parameter: 4247 . mat - the matrix 4248 4249 Level: intermediate 4250 4251 Notes: 4252 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4253 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4254 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4255 4256 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4257 4258 The format which is used for the sparse matrix input, is equivalent to a 4259 row-major ordering.. i.e for the following matrix, the input data expected is 4260 as shown 4261 4262 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4263 4264 $ 1 0 0 4265 $ 2 0 3 P0 4266 $ ------- 4267 $ 4 5 6 P1 4268 $ 4269 $ Process0 [P0]: rows_owned=[0,1] 4270 $ i = {0,1,3} [size = nrow+1 = 2+1] 4271 $ j = {0,0,2} [size = 3] 4272 $ v = {1,2,3} [size = 3] 4273 $ 4274 $ Process1 [P1]: rows_owned=[2] 4275 $ i = {0,3} [size = nrow+1 = 1+1] 4276 $ j = {0,1,2} [size = 3] 4277 $ v = {4,5,6} [size = 3] 4278 4279 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4280 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4281 @*/ 4282 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4283 { 4284 PetscErrorCode ierr; 4285 4286 PetscFunctionBegin; 4287 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4288 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4289 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4290 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4291 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4292 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4293 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4294 PetscFunctionReturn(0); 4295 } 4296 4297 /*@ 4298 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4299 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4300 4301 Collective 4302 4303 Input Parameters: 4304 + mat - the matrix 4305 . m - number of local rows (Cannot be PETSC_DECIDE) 4306 . n - This value should be the same as the local size used in creating the 4307 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4308 calculated if N is given) For square matrices n is almost always m. 4309 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4310 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4311 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4312 . J - column indices 4313 - v - matrix values 4314 4315 Level: intermediate 4316 4317 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4318 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4319 @*/ 4320 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4321 { 4322 PetscErrorCode ierr; 4323 PetscInt cstart,nnz,i,j; 4324 PetscInt *ld; 4325 PetscBool nooffprocentries; 4326 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4327 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4328 PetscScalar *ad = Ad->a, *ao = Ao->a; 4329 const PetscInt *Adi = Ad->i; 4330 PetscInt ldi,Iii,md; 4331 4332 PetscFunctionBegin; 4333 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4334 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4335 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4336 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4337 4338 cstart = mat->cmap->rstart; 4339 if (!Aij->ld) { 4340 /* count number of entries below block diagonal */ 4341 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4342 Aij->ld = ld; 4343 for (i=0; i<m; i++) { 4344 nnz = Ii[i+1]- Ii[i]; 4345 j = 0; 4346 while (J[j] < cstart && j < nnz) {j++;} 4347 J += nnz; 4348 ld[i] = j; 4349 } 4350 } else { 4351 ld = Aij->ld; 4352 } 4353 4354 for (i=0; i<m; i++) { 4355 nnz = Ii[i+1]- Ii[i]; 4356 Iii = Ii[i]; 4357 ldi = ld[i]; 4358 md = Adi[i+1]-Adi[i]; 4359 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4360 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4361 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4362 ad += md; 4363 ao += nnz - md; 4364 } 4365 nooffprocentries = mat->nooffprocentries; 4366 mat->nooffprocentries = PETSC_TRUE; 4367 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4368 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4369 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4370 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4371 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4372 mat->nooffprocentries = nooffprocentries; 4373 PetscFunctionReturn(0); 4374 } 4375 4376 /*@C 4377 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4378 (the default parallel PETSc format). For good matrix assembly performance 4379 the user should preallocate the matrix storage by setting the parameters 4380 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4381 performance can be increased by more than a factor of 50. 4382 4383 Collective 4384 4385 Input Parameters: 4386 + comm - MPI communicator 4387 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4388 This value should be the same as the local size used in creating the 4389 y vector for the matrix-vector product y = Ax. 4390 . n - This value should be the same as the local size used in creating the 4391 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4392 calculated if N is given) For square matrices n is almost always m. 4393 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4394 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4395 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4396 (same value is used for all local rows) 4397 . d_nnz - array containing the number of nonzeros in the various rows of the 4398 DIAGONAL portion of the local submatrix (possibly different for each row) 4399 or NULL, if d_nz is used to specify the nonzero structure. 4400 The size of this array is equal to the number of local rows, i.e 'm'. 4401 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4402 submatrix (same value is used for all local rows). 4403 - o_nnz - array containing the number of nonzeros in the various rows of the 4404 OFF-DIAGONAL portion of the local submatrix (possibly different for 4405 each row) or NULL, if o_nz is used to specify the nonzero 4406 structure. The size of this array is equal to the number 4407 of local rows, i.e 'm'. 4408 4409 Output Parameter: 4410 . A - the matrix 4411 4412 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4413 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4414 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4415 4416 Notes: 4417 If the *_nnz parameter is given then the *_nz parameter is ignored 4418 4419 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4420 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4421 storage requirements for this matrix. 4422 4423 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4424 processor than it must be used on all processors that share the object for 4425 that argument. 4426 4427 The user MUST specify either the local or global matrix dimensions 4428 (possibly both). 4429 4430 The parallel matrix is partitioned across processors such that the 4431 first m0 rows belong to process 0, the next m1 rows belong to 4432 process 1, the next m2 rows belong to process 2 etc.. where 4433 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4434 values corresponding to [m x N] submatrix. 4435 4436 The columns are logically partitioned with the n0 columns belonging 4437 to 0th partition, the next n1 columns belonging to the next 4438 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4439 4440 The DIAGONAL portion of the local submatrix on any given processor 4441 is the submatrix corresponding to the rows and columns m,n 4442 corresponding to the given processor. i.e diagonal matrix on 4443 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4444 etc. The remaining portion of the local submatrix [m x (N-n)] 4445 constitute the OFF-DIAGONAL portion. The example below better 4446 illustrates this concept. 4447 4448 For a square global matrix we define each processor's diagonal portion 4449 to be its local rows and the corresponding columns (a square submatrix); 4450 each processor's off-diagonal portion encompasses the remainder of the 4451 local matrix (a rectangular submatrix). 4452 4453 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4454 4455 When calling this routine with a single process communicator, a matrix of 4456 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4457 type of communicator, use the construction mechanism 4458 .vb 4459 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4460 .ve 4461 4462 $ MatCreate(...,&A); 4463 $ MatSetType(A,MATMPIAIJ); 4464 $ MatSetSizes(A, m,n,M,N); 4465 $ MatMPIAIJSetPreallocation(A,...); 4466 4467 By default, this format uses inodes (identical nodes) when possible. 4468 We search for consecutive rows with the same nonzero structure, thereby 4469 reusing matrix information to achieve increased efficiency. 4470 4471 Options Database Keys: 4472 + -mat_no_inode - Do not use inodes 4473 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4474 4475 4476 4477 Example usage: 4478 4479 Consider the following 8x8 matrix with 34 non-zero values, that is 4480 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4481 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4482 as follows 4483 4484 .vb 4485 1 2 0 | 0 3 0 | 0 4 4486 Proc0 0 5 6 | 7 0 0 | 8 0 4487 9 0 10 | 11 0 0 | 12 0 4488 ------------------------------------- 4489 13 0 14 | 15 16 17 | 0 0 4490 Proc1 0 18 0 | 19 20 21 | 0 0 4491 0 0 0 | 22 23 0 | 24 0 4492 ------------------------------------- 4493 Proc2 25 26 27 | 0 0 28 | 29 0 4494 30 0 0 | 31 32 33 | 0 34 4495 .ve 4496 4497 This can be represented as a collection of submatrices as 4498 4499 .vb 4500 A B C 4501 D E F 4502 G H I 4503 .ve 4504 4505 Where the submatrices A,B,C are owned by proc0, D,E,F are 4506 owned by proc1, G,H,I are owned by proc2. 4507 4508 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4509 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4510 The 'M','N' parameters are 8,8, and have the same values on all procs. 4511 4512 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4513 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4514 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4515 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4516 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4517 matrix, ans [DF] as another SeqAIJ matrix. 4518 4519 When d_nz, o_nz parameters are specified, d_nz storage elements are 4520 allocated for every row of the local diagonal submatrix, and o_nz 4521 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4522 One way to choose d_nz and o_nz is to use the max nonzerors per local 4523 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4524 In this case, the values of d_nz,o_nz are 4525 .vb 4526 proc0 : dnz = 2, o_nz = 2 4527 proc1 : dnz = 3, o_nz = 2 4528 proc2 : dnz = 1, o_nz = 4 4529 .ve 4530 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4531 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4532 for proc3. i.e we are using 12+15+10=37 storage locations to store 4533 34 values. 4534 4535 When d_nnz, o_nnz parameters are specified, the storage is specified 4536 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4537 In the above case the values for d_nnz,o_nnz are 4538 .vb 4539 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4540 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4541 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4542 .ve 4543 Here the space allocated is sum of all the above values i.e 34, and 4544 hence pre-allocation is perfect. 4545 4546 Level: intermediate 4547 4548 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4549 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4550 @*/ 4551 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4552 { 4553 PetscErrorCode ierr; 4554 PetscMPIInt size; 4555 4556 PetscFunctionBegin; 4557 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4558 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4559 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4560 if (size > 1) { 4561 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4562 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4563 } else { 4564 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4565 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4566 } 4567 PetscFunctionReturn(0); 4568 } 4569 4570 /*@C 4571 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4572 4573 Not collective 4574 4575 Input Parameter: 4576 . A - The MPIAIJ matrix 4577 4578 Output Parameters: 4579 + Ad - The local diagonal block as a SeqAIJ matrix 4580 . Ao - The local off-diagonal block as a SeqAIJ matrix 4581 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4582 4583 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4584 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4585 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4586 local column numbers to global column numbers in the original matrix. 4587 4588 Level: intermediate 4589 4590 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4591 @*/ 4592 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4593 { 4594 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4595 PetscBool flg; 4596 PetscErrorCode ierr; 4597 4598 PetscFunctionBegin; 4599 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4600 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4601 if (Ad) *Ad = a->A; 4602 if (Ao) *Ao = a->B; 4603 if (colmap) *colmap = a->garray; 4604 PetscFunctionReturn(0); 4605 } 4606 4607 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4608 { 4609 PetscErrorCode ierr; 4610 PetscInt m,N,i,rstart,nnz,Ii; 4611 PetscInt *indx; 4612 PetscScalar *values; 4613 4614 PetscFunctionBegin; 4615 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4616 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4617 PetscInt *dnz,*onz,sum,bs,cbs; 4618 4619 if (n == PETSC_DECIDE) { 4620 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4621 } 4622 /* Check sum(n) = N */ 4623 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4624 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4625 4626 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4627 rstart -= m; 4628 4629 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4630 for (i=0; i<m; i++) { 4631 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4632 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4633 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4634 } 4635 4636 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4637 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4638 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4639 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4640 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4641 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4642 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4643 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4644 } 4645 4646 /* numeric phase */ 4647 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4648 for (i=0; i<m; i++) { 4649 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4650 Ii = i + rstart; 4651 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4652 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4653 } 4654 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4655 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4656 PetscFunctionReturn(0); 4657 } 4658 4659 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4660 { 4661 PetscErrorCode ierr; 4662 PetscMPIInt rank; 4663 PetscInt m,N,i,rstart,nnz; 4664 size_t len; 4665 const PetscInt *indx; 4666 PetscViewer out; 4667 char *name; 4668 Mat B; 4669 const PetscScalar *values; 4670 4671 PetscFunctionBegin; 4672 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4673 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4674 /* Should this be the type of the diagonal block of A? */ 4675 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4676 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4677 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4678 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4679 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4680 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4681 for (i=0; i<m; i++) { 4682 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4683 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4684 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4685 } 4686 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4687 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4688 4689 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4690 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4691 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4692 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4693 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4694 ierr = PetscFree(name);CHKERRQ(ierr); 4695 ierr = MatView(B,out);CHKERRQ(ierr); 4696 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4697 ierr = MatDestroy(&B);CHKERRQ(ierr); 4698 PetscFunctionReturn(0); 4699 } 4700 4701 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4702 { 4703 PetscErrorCode ierr; 4704 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4705 4706 PetscFunctionBegin; 4707 if (!merge) PetscFunctionReturn(0); 4708 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4709 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4710 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4711 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4712 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4713 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4714 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4715 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4716 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4717 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4718 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4719 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4720 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4721 ierr = PetscFree(merge);CHKERRQ(ierr); 4722 PetscFunctionReturn(0); 4723 } 4724 4725 #include <../src/mat/utils/freespace.h> 4726 #include <petscbt.h> 4727 4728 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4729 { 4730 PetscErrorCode ierr; 4731 MPI_Comm comm; 4732 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4733 PetscMPIInt size,rank,taga,*len_s; 4734 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4735 PetscInt proc,m; 4736 PetscInt **buf_ri,**buf_rj; 4737 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4738 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4739 MPI_Request *s_waits,*r_waits; 4740 MPI_Status *status; 4741 MatScalar *aa=a->a; 4742 MatScalar **abuf_r,*ba_i; 4743 Mat_Merge_SeqsToMPI *merge; 4744 PetscContainer container; 4745 4746 PetscFunctionBegin; 4747 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4748 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4749 4750 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4751 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4752 4753 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4754 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4755 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4756 4757 bi = merge->bi; 4758 bj = merge->bj; 4759 buf_ri = merge->buf_ri; 4760 buf_rj = merge->buf_rj; 4761 4762 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4763 owners = merge->rowmap->range; 4764 len_s = merge->len_s; 4765 4766 /* send and recv matrix values */ 4767 /*-----------------------------*/ 4768 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4769 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4770 4771 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4772 for (proc=0,k=0; proc<size; proc++) { 4773 if (!len_s[proc]) continue; 4774 i = owners[proc]; 4775 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4776 k++; 4777 } 4778 4779 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4780 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4781 ierr = PetscFree(status);CHKERRQ(ierr); 4782 4783 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4784 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4785 4786 /* insert mat values of mpimat */ 4787 /*----------------------------*/ 4788 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4789 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4790 4791 for (k=0; k<merge->nrecv; k++) { 4792 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4793 nrows = *(buf_ri_k[k]); 4794 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4795 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4796 } 4797 4798 /* set values of ba */ 4799 m = merge->rowmap->n; 4800 for (i=0; i<m; i++) { 4801 arow = owners[rank] + i; 4802 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4803 bnzi = bi[i+1] - bi[i]; 4804 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4805 4806 /* add local non-zero vals of this proc's seqmat into ba */ 4807 anzi = ai[arow+1] - ai[arow]; 4808 aj = a->j + ai[arow]; 4809 aa = a->a + ai[arow]; 4810 nextaj = 0; 4811 for (j=0; nextaj<anzi; j++) { 4812 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4813 ba_i[j] += aa[nextaj++]; 4814 } 4815 } 4816 4817 /* add received vals into ba */ 4818 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4819 /* i-th row */ 4820 if (i == *nextrow[k]) { 4821 anzi = *(nextai[k]+1) - *nextai[k]; 4822 aj = buf_rj[k] + *(nextai[k]); 4823 aa = abuf_r[k] + *(nextai[k]); 4824 nextaj = 0; 4825 for (j=0; nextaj<anzi; j++) { 4826 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4827 ba_i[j] += aa[nextaj++]; 4828 } 4829 } 4830 nextrow[k]++; nextai[k]++; 4831 } 4832 } 4833 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4834 } 4835 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4836 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4837 4838 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4839 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4840 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4841 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4842 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4843 PetscFunctionReturn(0); 4844 } 4845 4846 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4847 { 4848 PetscErrorCode ierr; 4849 Mat B_mpi; 4850 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4851 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4852 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4853 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4854 PetscInt len,proc,*dnz,*onz,bs,cbs; 4855 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4856 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4857 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4858 MPI_Status *status; 4859 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4860 PetscBT lnkbt; 4861 Mat_Merge_SeqsToMPI *merge; 4862 PetscContainer container; 4863 4864 PetscFunctionBegin; 4865 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4866 4867 /* make sure it is a PETSc comm */ 4868 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4869 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4870 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4871 4872 ierr = PetscNew(&merge);CHKERRQ(ierr); 4873 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4874 4875 /* determine row ownership */ 4876 /*---------------------------------------------------------*/ 4877 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4878 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4879 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4880 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4881 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4882 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4883 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4884 4885 m = merge->rowmap->n; 4886 owners = merge->rowmap->range; 4887 4888 /* determine the number of messages to send, their lengths */ 4889 /*---------------------------------------------------------*/ 4890 len_s = merge->len_s; 4891 4892 len = 0; /* length of buf_si[] */ 4893 merge->nsend = 0; 4894 for (proc=0; proc<size; proc++) { 4895 len_si[proc] = 0; 4896 if (proc == rank) { 4897 len_s[proc] = 0; 4898 } else { 4899 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4900 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4901 } 4902 if (len_s[proc]) { 4903 merge->nsend++; 4904 nrows = 0; 4905 for (i=owners[proc]; i<owners[proc+1]; i++) { 4906 if (ai[i+1] > ai[i]) nrows++; 4907 } 4908 len_si[proc] = 2*(nrows+1); 4909 len += len_si[proc]; 4910 } 4911 } 4912 4913 /* determine the number and length of messages to receive for ij-structure */ 4914 /*-------------------------------------------------------------------------*/ 4915 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4916 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4917 4918 /* post the Irecv of j-structure */ 4919 /*-------------------------------*/ 4920 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4921 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4922 4923 /* post the Isend of j-structure */ 4924 /*--------------------------------*/ 4925 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4926 4927 for (proc=0, k=0; proc<size; proc++) { 4928 if (!len_s[proc]) continue; 4929 i = owners[proc]; 4930 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4931 k++; 4932 } 4933 4934 /* receives and sends of j-structure are complete */ 4935 /*------------------------------------------------*/ 4936 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4937 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4938 4939 /* send and recv i-structure */ 4940 /*---------------------------*/ 4941 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4942 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4943 4944 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4945 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4946 for (proc=0,k=0; proc<size; proc++) { 4947 if (!len_s[proc]) continue; 4948 /* form outgoing message for i-structure: 4949 buf_si[0]: nrows to be sent 4950 [1:nrows]: row index (global) 4951 [nrows+1:2*nrows+1]: i-structure index 4952 */ 4953 /*-------------------------------------------*/ 4954 nrows = len_si[proc]/2 - 1; 4955 buf_si_i = buf_si + nrows+1; 4956 buf_si[0] = nrows; 4957 buf_si_i[0] = 0; 4958 nrows = 0; 4959 for (i=owners[proc]; i<owners[proc+1]; i++) { 4960 anzi = ai[i+1] - ai[i]; 4961 if (anzi) { 4962 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4963 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4964 nrows++; 4965 } 4966 } 4967 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4968 k++; 4969 buf_si += len_si[proc]; 4970 } 4971 4972 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4973 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4974 4975 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4976 for (i=0; i<merge->nrecv; i++) { 4977 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4978 } 4979 4980 ierr = PetscFree(len_si);CHKERRQ(ierr); 4981 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4982 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4983 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4984 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4985 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4986 ierr = PetscFree(status);CHKERRQ(ierr); 4987 4988 /* compute a local seq matrix in each processor */ 4989 /*----------------------------------------------*/ 4990 /* allocate bi array and free space for accumulating nonzero column info */ 4991 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4992 bi[0] = 0; 4993 4994 /* create and initialize a linked list */ 4995 nlnk = N+1; 4996 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4997 4998 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4999 len = ai[owners[rank+1]] - ai[owners[rank]]; 5000 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 5001 5002 current_space = free_space; 5003 5004 /* determine symbolic info for each local row */ 5005 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 5006 5007 for (k=0; k<merge->nrecv; k++) { 5008 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5009 nrows = *buf_ri_k[k]; 5010 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5011 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 5012 } 5013 5014 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 5015 len = 0; 5016 for (i=0; i<m; i++) { 5017 bnzi = 0; 5018 /* add local non-zero cols of this proc's seqmat into lnk */ 5019 arow = owners[rank] + i; 5020 anzi = ai[arow+1] - ai[arow]; 5021 aj = a->j + ai[arow]; 5022 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5023 bnzi += nlnk; 5024 /* add received col data into lnk */ 5025 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 5026 if (i == *nextrow[k]) { /* i-th row */ 5027 anzi = *(nextai[k]+1) - *nextai[k]; 5028 aj = buf_rj[k] + *nextai[k]; 5029 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5030 bnzi += nlnk; 5031 nextrow[k]++; nextai[k]++; 5032 } 5033 } 5034 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5035 5036 /* if free space is not available, make more free space */ 5037 if (current_space->local_remaining<bnzi) { 5038 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 5039 nspacedouble++; 5040 } 5041 /* copy data into free space, then initialize lnk */ 5042 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5043 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5044 5045 current_space->array += bnzi; 5046 current_space->local_used += bnzi; 5047 current_space->local_remaining -= bnzi; 5048 5049 bi[i+1] = bi[i] + bnzi; 5050 } 5051 5052 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5053 5054 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 5055 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5056 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5057 5058 /* create symbolic parallel matrix B_mpi */ 5059 /*---------------------------------------*/ 5060 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5061 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5062 if (n==PETSC_DECIDE) { 5063 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5064 } else { 5065 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5066 } 5067 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5068 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5069 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5070 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5071 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5072 5073 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5074 B_mpi->assembled = PETSC_FALSE; 5075 merge->bi = bi; 5076 merge->bj = bj; 5077 merge->buf_ri = buf_ri; 5078 merge->buf_rj = buf_rj; 5079 merge->coi = NULL; 5080 merge->coj = NULL; 5081 merge->owners_co = NULL; 5082 5083 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5084 5085 /* attach the supporting struct to B_mpi for reuse */ 5086 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5087 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5088 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 5089 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5090 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5091 *mpimat = B_mpi; 5092 5093 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5094 PetscFunctionReturn(0); 5095 } 5096 5097 /*@C 5098 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5099 matrices from each processor 5100 5101 Collective 5102 5103 Input Parameters: 5104 + comm - the communicators the parallel matrix will live on 5105 . seqmat - the input sequential matrices 5106 . m - number of local rows (or PETSC_DECIDE) 5107 . n - number of local columns (or PETSC_DECIDE) 5108 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5109 5110 Output Parameter: 5111 . mpimat - the parallel matrix generated 5112 5113 Level: advanced 5114 5115 Notes: 5116 The dimensions of the sequential matrix in each processor MUST be the same. 5117 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5118 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5119 @*/ 5120 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5121 { 5122 PetscErrorCode ierr; 5123 PetscMPIInt size; 5124 5125 PetscFunctionBegin; 5126 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5127 if (size == 1) { 5128 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5129 if (scall == MAT_INITIAL_MATRIX) { 5130 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5131 } else { 5132 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5133 } 5134 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5135 PetscFunctionReturn(0); 5136 } 5137 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5138 if (scall == MAT_INITIAL_MATRIX) { 5139 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5140 } 5141 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5142 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5143 PetscFunctionReturn(0); 5144 } 5145 5146 /*@ 5147 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5148 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5149 with MatGetSize() 5150 5151 Not Collective 5152 5153 Input Parameters: 5154 + A - the matrix 5155 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5156 5157 Output Parameter: 5158 . A_loc - the local sequential matrix generated 5159 5160 Level: developer 5161 5162 Notes: 5163 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5164 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5165 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5166 modify the values of the returned A_loc. 5167 5168 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5169 5170 @*/ 5171 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5172 { 5173 PetscErrorCode ierr; 5174 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5175 Mat_SeqAIJ *mat,*a,*b; 5176 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5177 MatScalar *aa,*ba,*cam; 5178 PetscScalar *ca; 5179 PetscMPIInt size; 5180 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5181 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5182 PetscBool match; 5183 5184 PetscFunctionBegin; 5185 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5186 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5187 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 5188 if (size == 1) { 5189 if (scall == MAT_INITIAL_MATRIX) { 5190 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5191 *A_loc = mpimat->A; 5192 } else if (scall == MAT_REUSE_MATRIX) { 5193 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5194 } 5195 PetscFunctionReturn(0); 5196 } 5197 5198 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5199 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5200 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5201 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5202 aa = a->a; ba = b->a; 5203 if (scall == MAT_INITIAL_MATRIX) { 5204 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5205 ci[0] = 0; 5206 for (i=0; i<am; i++) { 5207 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5208 } 5209 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5210 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5211 k = 0; 5212 for (i=0; i<am; i++) { 5213 ncols_o = bi[i+1] - bi[i]; 5214 ncols_d = ai[i+1] - ai[i]; 5215 /* off-diagonal portion of A */ 5216 for (jo=0; jo<ncols_o; jo++) { 5217 col = cmap[*bj]; 5218 if (col >= cstart) break; 5219 cj[k] = col; bj++; 5220 ca[k++] = *ba++; 5221 } 5222 /* diagonal portion of A */ 5223 for (j=0; j<ncols_d; j++) { 5224 cj[k] = cstart + *aj++; 5225 ca[k++] = *aa++; 5226 } 5227 /* off-diagonal portion of A */ 5228 for (j=jo; j<ncols_o; j++) { 5229 cj[k] = cmap[*bj++]; 5230 ca[k++] = *ba++; 5231 } 5232 } 5233 /* put together the new matrix */ 5234 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5235 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5236 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5237 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5238 mat->free_a = PETSC_TRUE; 5239 mat->free_ij = PETSC_TRUE; 5240 mat->nonew = 0; 5241 } else if (scall == MAT_REUSE_MATRIX) { 5242 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5243 ci = mat->i; cj = mat->j; cam = mat->a; 5244 for (i=0; i<am; i++) { 5245 /* off-diagonal portion of A */ 5246 ncols_o = bi[i+1] - bi[i]; 5247 for (jo=0; jo<ncols_o; jo++) { 5248 col = cmap[*bj]; 5249 if (col >= cstart) break; 5250 *cam++ = *ba++; bj++; 5251 } 5252 /* diagonal portion of A */ 5253 ncols_d = ai[i+1] - ai[i]; 5254 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5255 /* off-diagonal portion of A */ 5256 for (j=jo; j<ncols_o; j++) { 5257 *cam++ = *ba++; bj++; 5258 } 5259 } 5260 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5261 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5262 PetscFunctionReturn(0); 5263 } 5264 5265 /*@C 5266 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5267 5268 Not Collective 5269 5270 Input Parameters: 5271 + A - the matrix 5272 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5273 - row, col - index sets of rows and columns to extract (or NULL) 5274 5275 Output Parameter: 5276 . A_loc - the local sequential matrix generated 5277 5278 Level: developer 5279 5280 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5281 5282 @*/ 5283 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5284 { 5285 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5286 PetscErrorCode ierr; 5287 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5288 IS isrowa,iscola; 5289 Mat *aloc; 5290 PetscBool match; 5291 5292 PetscFunctionBegin; 5293 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5294 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5295 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5296 if (!row) { 5297 start = A->rmap->rstart; end = A->rmap->rend; 5298 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5299 } else { 5300 isrowa = *row; 5301 } 5302 if (!col) { 5303 start = A->cmap->rstart; 5304 cmap = a->garray; 5305 nzA = a->A->cmap->n; 5306 nzB = a->B->cmap->n; 5307 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5308 ncols = 0; 5309 for (i=0; i<nzB; i++) { 5310 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5311 else break; 5312 } 5313 imark = i; 5314 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5315 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5316 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5317 } else { 5318 iscola = *col; 5319 } 5320 if (scall != MAT_INITIAL_MATRIX) { 5321 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5322 aloc[0] = *A_loc; 5323 } 5324 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5325 if (!col) { /* attach global id of condensed columns */ 5326 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5327 } 5328 *A_loc = aloc[0]; 5329 ierr = PetscFree(aloc);CHKERRQ(ierr); 5330 if (!row) { 5331 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5332 } 5333 if (!col) { 5334 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5335 } 5336 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5337 PetscFunctionReturn(0); 5338 } 5339 5340 /* 5341 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5342 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5343 * on a global size. 5344 * */ 5345 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5346 { 5347 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5348 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5349 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5350 PetscMPIInt owner; 5351 PetscSFNode *iremote,*oiremote; 5352 const PetscInt *lrowindices; 5353 PetscErrorCode ierr; 5354 PetscSF sf,osf; 5355 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5356 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5357 MPI_Comm comm; 5358 ISLocalToGlobalMapping mapping; 5359 5360 PetscFunctionBegin; 5361 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5362 /* plocalsize is the number of roots 5363 * nrows is the number of leaves 5364 * */ 5365 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5366 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5367 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5368 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5369 for (i=0;i<nrows;i++) { 5370 /* Find a remote index and an owner for a row 5371 * The row could be local or remote 5372 * */ 5373 owner = 0; 5374 lidx = 0; 5375 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5376 iremote[i].index = lidx; 5377 iremote[i].rank = owner; 5378 } 5379 /* Create SF to communicate how many nonzero columns for each row */ 5380 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5381 /* SF will figure out the number of nonzero colunms for each row, and their 5382 * offsets 5383 * */ 5384 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5385 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5386 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5387 5388 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5389 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5390 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5391 roffsets[0] = 0; 5392 roffsets[1] = 0; 5393 for (i=0;i<plocalsize;i++) { 5394 /* diag */ 5395 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5396 /* off diag */ 5397 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5398 /* compute offsets so that we relative location for each row */ 5399 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5400 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5401 } 5402 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5403 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5404 /* 'r' means root, and 'l' means leaf */ 5405 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5406 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5407 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5408 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5409 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5410 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5411 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5412 dntotalcols = 0; 5413 ontotalcols = 0; 5414 ncol = 0; 5415 for (i=0;i<nrows;i++) { 5416 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5417 ncol = PetscMax(pnnz[i],ncol); 5418 /* diag */ 5419 dntotalcols += nlcols[i*2+0]; 5420 /* off diag */ 5421 ontotalcols += nlcols[i*2+1]; 5422 } 5423 /* We do not need to figure the right number of columns 5424 * since all the calculations will be done by going through the raw data 5425 * */ 5426 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5427 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5428 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5429 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5430 /* diag */ 5431 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5432 /* off diag */ 5433 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5434 /* diag */ 5435 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5436 /* off diag */ 5437 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5438 dntotalcols = 0; 5439 ontotalcols = 0; 5440 ntotalcols = 0; 5441 for (i=0;i<nrows;i++) { 5442 owner = 0; 5443 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5444 /* Set iremote for diag matrix */ 5445 for (j=0;j<nlcols[i*2+0];j++) { 5446 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5447 iremote[dntotalcols].rank = owner; 5448 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5449 ilocal[dntotalcols++] = ntotalcols++; 5450 } 5451 /* off diag */ 5452 for (j=0;j<nlcols[i*2+1];j++) { 5453 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5454 oiremote[ontotalcols].rank = owner; 5455 oilocal[ontotalcols++] = ntotalcols++; 5456 } 5457 } 5458 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5459 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5460 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5461 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5462 /* P serves as roots and P_oth is leaves 5463 * Diag matrix 5464 * */ 5465 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5466 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5467 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5468 5469 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5470 /* Off diag */ 5471 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5472 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5473 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5474 /* We operate on the matrix internal data for saving memory */ 5475 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5476 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5477 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5478 /* Convert to global indices for diag matrix */ 5479 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5480 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5481 /* We want P_oth store global indices */ 5482 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5483 /* Use memory scalable approach */ 5484 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5485 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5486 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5487 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5488 /* Convert back to local indices */ 5489 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5490 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5491 nout = 0; 5492 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5493 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5494 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5495 /* Exchange values */ 5496 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5497 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5498 /* Stop PETSc from shrinking memory */ 5499 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5500 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5501 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5502 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5503 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5504 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5505 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5506 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5507 PetscFunctionReturn(0); 5508 } 5509 5510 /* 5511 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5512 * This supports MPIAIJ and MAIJ 5513 * */ 5514 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5515 { 5516 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5517 Mat_SeqAIJ *p_oth; 5518 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5519 IS rows,map; 5520 PetscHMapI hamp; 5521 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5522 MPI_Comm comm; 5523 PetscSF sf,osf; 5524 PetscBool has; 5525 PetscErrorCode ierr; 5526 5527 PetscFunctionBegin; 5528 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5529 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5530 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5531 * and then create a submatrix (that often is an overlapping matrix) 5532 * */ 5533 if (reuse == MAT_INITIAL_MATRIX) { 5534 /* Use a hash table to figure out unique keys */ 5535 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5536 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5537 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5538 count = 0; 5539 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5540 for (i=0;i<a->B->cmap->n;i++) { 5541 key = a->garray[i]/dof; 5542 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5543 if (!has) { 5544 mapping[i] = count; 5545 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5546 } else { 5547 /* Current 'i' has the same value the previous step */ 5548 mapping[i] = count-1; 5549 } 5550 } 5551 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5552 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5553 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5554 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5555 off = 0; 5556 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5557 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5558 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5559 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5560 /* In case, the matrix was already created but users want to recreate the matrix */ 5561 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5562 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5563 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5564 ierr = ISDestroy(&map);CHKERRQ(ierr); 5565 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5566 } else if (reuse == MAT_REUSE_MATRIX) { 5567 /* If matrix was already created, we simply update values using SF objects 5568 * that as attached to the matrix ealier. 5569 * */ 5570 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5571 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5572 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5573 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5574 /* Update values in place */ 5575 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5576 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5577 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5578 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5579 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5580 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5581 PetscFunctionReturn(0); 5582 } 5583 5584 /*@C 5585 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5586 5587 Collective on Mat 5588 5589 Input Parameters: 5590 + A,B - the matrices in mpiaij format 5591 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5592 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5593 5594 Output Parameter: 5595 + rowb, colb - index sets of rows and columns of B to extract 5596 - B_seq - the sequential matrix generated 5597 5598 Level: developer 5599 5600 @*/ 5601 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5602 { 5603 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5604 PetscErrorCode ierr; 5605 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5606 IS isrowb,iscolb; 5607 Mat *bseq=NULL; 5608 5609 PetscFunctionBegin; 5610 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5611 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5612 } 5613 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5614 5615 if (scall == MAT_INITIAL_MATRIX) { 5616 start = A->cmap->rstart; 5617 cmap = a->garray; 5618 nzA = a->A->cmap->n; 5619 nzB = a->B->cmap->n; 5620 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5621 ncols = 0; 5622 for (i=0; i<nzB; i++) { /* row < local row index */ 5623 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5624 else break; 5625 } 5626 imark = i; 5627 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5628 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5629 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5630 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5631 } else { 5632 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5633 isrowb = *rowb; iscolb = *colb; 5634 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5635 bseq[0] = *B_seq; 5636 } 5637 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5638 *B_seq = bseq[0]; 5639 ierr = PetscFree(bseq);CHKERRQ(ierr); 5640 if (!rowb) { 5641 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5642 } else { 5643 *rowb = isrowb; 5644 } 5645 if (!colb) { 5646 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5647 } else { 5648 *colb = iscolb; 5649 } 5650 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5651 PetscFunctionReturn(0); 5652 } 5653 5654 /* 5655 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5656 of the OFF-DIAGONAL portion of local A 5657 5658 Collective on Mat 5659 5660 Input Parameters: 5661 + A,B - the matrices in mpiaij format 5662 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5663 5664 Output Parameter: 5665 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5666 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5667 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5668 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5669 5670 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5671 for this matrix. This is not desirable.. 5672 5673 Level: developer 5674 5675 */ 5676 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5677 { 5678 PetscErrorCode ierr; 5679 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5680 Mat_SeqAIJ *b_oth; 5681 VecScatter ctx; 5682 MPI_Comm comm; 5683 const PetscMPIInt *rprocs,*sprocs; 5684 const PetscInt *srow,*rstarts,*sstarts; 5685 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5686 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5687 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5688 MPI_Request *rwaits = NULL,*swaits = NULL; 5689 MPI_Status rstatus; 5690 PetscMPIInt size,tag,rank,nsends_mpi,nrecvs_mpi; 5691 PETSC_UNUSED PetscMPIInt jj; 5692 5693 PetscFunctionBegin; 5694 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5695 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5696 5697 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5698 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5699 } 5700 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5701 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5702 5703 if (size == 1) { 5704 startsj_s = NULL; 5705 bufa_ptr = NULL; 5706 *B_oth = NULL; 5707 PetscFunctionReturn(0); 5708 } 5709 5710 ctx = a->Mvctx; 5711 tag = ((PetscObject)ctx)->tag; 5712 5713 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5714 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5715 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5716 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5717 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5718 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5719 5720 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5721 if (scall == MAT_INITIAL_MATRIX) { 5722 /* i-array */ 5723 /*---------*/ 5724 /* post receives */ 5725 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5726 for (i=0; i<nrecvs; i++) { 5727 rowlen = rvalues + rstarts[i]*rbs; 5728 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5729 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5730 } 5731 5732 /* pack the outgoing message */ 5733 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5734 5735 sstartsj[0] = 0; 5736 rstartsj[0] = 0; 5737 len = 0; /* total length of j or a array to be sent */ 5738 if (nsends) { 5739 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5740 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5741 } 5742 for (i=0; i<nsends; i++) { 5743 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5744 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5745 for (j=0; j<nrows; j++) { 5746 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5747 for (l=0; l<sbs; l++) { 5748 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5749 5750 rowlen[j*sbs+l] = ncols; 5751 5752 len += ncols; 5753 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5754 } 5755 k++; 5756 } 5757 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5758 5759 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5760 } 5761 /* recvs and sends of i-array are completed */ 5762 i = nrecvs; 5763 while (i--) { 5764 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5765 } 5766 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5767 ierr = PetscFree(svalues);CHKERRQ(ierr); 5768 5769 /* allocate buffers for sending j and a arrays */ 5770 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5771 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5772 5773 /* create i-array of B_oth */ 5774 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5775 5776 b_othi[0] = 0; 5777 len = 0; /* total length of j or a array to be received */ 5778 k = 0; 5779 for (i=0; i<nrecvs; i++) { 5780 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5781 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5782 for (j=0; j<nrows; j++) { 5783 b_othi[k+1] = b_othi[k] + rowlen[j]; 5784 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5785 k++; 5786 } 5787 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5788 } 5789 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5790 5791 /* allocate space for j and a arrrays of B_oth */ 5792 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5793 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5794 5795 /* j-array */ 5796 /*---------*/ 5797 /* post receives of j-array */ 5798 for (i=0; i<nrecvs; i++) { 5799 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5800 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5801 } 5802 5803 /* pack the outgoing message j-array */ 5804 if (nsends) k = sstarts[0]; 5805 for (i=0; i<nsends; i++) { 5806 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5807 bufJ = bufj+sstartsj[i]; 5808 for (j=0; j<nrows; j++) { 5809 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5810 for (ll=0; ll<sbs; ll++) { 5811 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5812 for (l=0; l<ncols; l++) { 5813 *bufJ++ = cols[l]; 5814 } 5815 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5816 } 5817 } 5818 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5819 } 5820 5821 /* recvs and sends of j-array are completed */ 5822 i = nrecvs; 5823 while (i--) { 5824 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5825 } 5826 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5827 } else if (scall == MAT_REUSE_MATRIX) { 5828 sstartsj = *startsj_s; 5829 rstartsj = *startsj_r; 5830 bufa = *bufa_ptr; 5831 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5832 b_otha = b_oth->a; 5833 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5834 5835 /* a-array */ 5836 /*---------*/ 5837 /* post receives of a-array */ 5838 for (i=0; i<nrecvs; i++) { 5839 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5840 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5841 } 5842 5843 /* pack the outgoing message a-array */ 5844 if (nsends) k = sstarts[0]; 5845 for (i=0; i<nsends; i++) { 5846 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5847 bufA = bufa+sstartsj[i]; 5848 for (j=0; j<nrows; j++) { 5849 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5850 for (ll=0; ll<sbs; ll++) { 5851 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5852 for (l=0; l<ncols; l++) { 5853 *bufA++ = vals[l]; 5854 } 5855 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5856 } 5857 } 5858 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5859 } 5860 /* recvs and sends of a-array are completed */ 5861 i = nrecvs; 5862 while (i--) { 5863 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5864 } 5865 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5866 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5867 5868 if (scall == MAT_INITIAL_MATRIX) { 5869 /* put together the new matrix */ 5870 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5871 5872 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5873 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5874 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5875 b_oth->free_a = PETSC_TRUE; 5876 b_oth->free_ij = PETSC_TRUE; 5877 b_oth->nonew = 0; 5878 5879 ierr = PetscFree(bufj);CHKERRQ(ierr); 5880 if (!startsj_s || !bufa_ptr) { 5881 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5882 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5883 } else { 5884 *startsj_s = sstartsj; 5885 *startsj_r = rstartsj; 5886 *bufa_ptr = bufa; 5887 } 5888 } 5889 5890 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5891 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5892 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5893 PetscFunctionReturn(0); 5894 } 5895 5896 /*@C 5897 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5898 5899 Not Collective 5900 5901 Input Parameters: 5902 . A - The matrix in mpiaij format 5903 5904 Output Parameter: 5905 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5906 . colmap - A map from global column index to local index into lvec 5907 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5908 5909 Level: developer 5910 5911 @*/ 5912 #if defined(PETSC_USE_CTABLE) 5913 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5914 #else 5915 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5916 #endif 5917 { 5918 Mat_MPIAIJ *a; 5919 5920 PetscFunctionBegin; 5921 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5922 PetscValidPointer(lvec, 2); 5923 PetscValidPointer(colmap, 3); 5924 PetscValidPointer(multScatter, 4); 5925 a = (Mat_MPIAIJ*) A->data; 5926 if (lvec) *lvec = a->lvec; 5927 if (colmap) *colmap = a->colmap; 5928 if (multScatter) *multScatter = a->Mvctx; 5929 PetscFunctionReturn(0); 5930 } 5931 5932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5933 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5935 #if defined(PETSC_HAVE_MKL_SPARSE) 5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5937 #endif 5938 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5940 #if defined(PETSC_HAVE_ELEMENTAL) 5941 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5942 #endif 5943 #if defined(PETSC_HAVE_SCALAPACK) 5944 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5945 #endif 5946 #if defined(PETSC_HAVE_HYPRE) 5947 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5948 #endif 5949 #if defined(PETSC_HAVE_CUDA) 5950 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5951 #endif 5952 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5953 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5954 #endif 5955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5956 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5957 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5958 5959 /* 5960 Computes (B'*A')' since computing B*A directly is untenable 5961 5962 n p p 5963 [ ] [ ] [ ] 5964 m [ A ] * n [ B ] = m [ C ] 5965 [ ] [ ] [ ] 5966 5967 */ 5968 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5969 { 5970 PetscErrorCode ierr; 5971 Mat At,Bt,Ct; 5972 5973 PetscFunctionBegin; 5974 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5975 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5976 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5977 ierr = MatDestroy(&At);CHKERRQ(ierr); 5978 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5979 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5980 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5981 PetscFunctionReturn(0); 5982 } 5983 5984 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5985 { 5986 PetscErrorCode ierr; 5987 PetscBool cisdense; 5988 5989 PetscFunctionBegin; 5990 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5991 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5992 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5993 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5994 if (!cisdense) { 5995 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5996 } 5997 ierr = MatSetUp(C);CHKERRQ(ierr); 5998 5999 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6000 PetscFunctionReturn(0); 6001 } 6002 6003 /* ----------------------------------------------------------------*/ 6004 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6005 { 6006 Mat_Product *product = C->product; 6007 Mat A = product->A,B=product->B; 6008 6009 PetscFunctionBegin; 6010 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6011 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6012 6013 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6014 C->ops->productsymbolic = MatProductSymbolic_AB; 6015 PetscFunctionReturn(0); 6016 } 6017 6018 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6019 { 6020 PetscErrorCode ierr; 6021 Mat_Product *product = C->product; 6022 6023 PetscFunctionBegin; 6024 if (product->type == MATPRODUCT_AB) { 6025 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6026 } 6027 PetscFunctionReturn(0); 6028 } 6029 /* ----------------------------------------------------------------*/ 6030 6031 /*MC 6032 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6033 6034 Options Database Keys: 6035 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6036 6037 Level: beginner 6038 6039 Notes: 6040 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6041 in this case the values associated with the rows and columns one passes in are set to zero 6042 in the matrix 6043 6044 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6045 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6046 6047 .seealso: MatCreateAIJ() 6048 M*/ 6049 6050 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6051 { 6052 Mat_MPIAIJ *b; 6053 PetscErrorCode ierr; 6054 PetscMPIInt size; 6055 6056 PetscFunctionBegin; 6057 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 6058 6059 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6060 B->data = (void*)b; 6061 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6062 B->assembled = PETSC_FALSE; 6063 B->insertmode = NOT_SET_VALUES; 6064 b->size = size; 6065 6066 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 6067 6068 /* build cache for off array entries formed */ 6069 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6070 6071 b->donotstash = PETSC_FALSE; 6072 b->colmap = NULL; 6073 b->garray = NULL; 6074 b->roworiented = PETSC_TRUE; 6075 6076 /* stuff used for matrix vector multiply */ 6077 b->lvec = NULL; 6078 b->Mvctx = NULL; 6079 6080 /* stuff for MatGetRow() */ 6081 b->rowindices = NULL; 6082 b->rowvalues = NULL; 6083 b->getrowactive = PETSC_FALSE; 6084 6085 /* flexible pointer used in CUSPARSE classes */ 6086 b->spptr = NULL; 6087 6088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6098 #if defined(PETSC_HAVE_CUDA) 6099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6100 #endif 6101 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6103 #endif 6104 #if defined(PETSC_HAVE_MKL_SPARSE) 6105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6106 #endif 6107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6110 #if defined(PETSC_HAVE_ELEMENTAL) 6111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6112 #endif 6113 #if defined(PETSC_HAVE_SCALAPACK) 6114 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6115 #endif 6116 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6117 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6118 #if defined(PETSC_HAVE_HYPRE) 6119 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6120 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6121 #endif 6122 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6123 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6124 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6125 PetscFunctionReturn(0); 6126 } 6127 6128 /*@C 6129 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6130 and "off-diagonal" part of the matrix in CSR format. 6131 6132 Collective 6133 6134 Input Parameters: 6135 + comm - MPI communicator 6136 . m - number of local rows (Cannot be PETSC_DECIDE) 6137 . n - This value should be the same as the local size used in creating the 6138 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6139 calculated if N is given) For square matrices n is almost always m. 6140 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6141 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6142 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6143 . j - column indices 6144 . a - matrix values 6145 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6146 . oj - column indices 6147 - oa - matrix values 6148 6149 Output Parameter: 6150 . mat - the matrix 6151 6152 Level: advanced 6153 6154 Notes: 6155 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6156 must free the arrays once the matrix has been destroyed and not before. 6157 6158 The i and j indices are 0 based 6159 6160 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6161 6162 This sets local rows and cannot be used to set off-processor values. 6163 6164 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6165 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6166 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6167 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6168 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6169 communication if it is known that only local entries will be set. 6170 6171 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6172 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6173 @*/ 6174 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6175 { 6176 PetscErrorCode ierr; 6177 Mat_MPIAIJ *maij; 6178 6179 PetscFunctionBegin; 6180 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6181 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6182 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6183 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6184 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6185 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6186 maij = (Mat_MPIAIJ*) (*mat)->data; 6187 6188 (*mat)->preallocated = PETSC_TRUE; 6189 6190 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6191 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6192 6193 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6194 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6195 6196 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6197 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6198 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6199 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6200 6201 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6202 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6203 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6204 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6205 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6206 PetscFunctionReturn(0); 6207 } 6208 6209 /* 6210 Special version for direct calls from Fortran 6211 */ 6212 #include <petsc/private/fortranimpl.h> 6213 6214 /* Change these macros so can be used in void function */ 6215 #undef CHKERRQ 6216 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6217 #undef SETERRQ2 6218 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6219 #undef SETERRQ3 6220 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6221 #undef SETERRQ 6222 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6223 6224 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6225 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6226 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6227 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6228 #else 6229 #endif 6230 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6231 { 6232 Mat mat = *mmat; 6233 PetscInt m = *mm, n = *mn; 6234 InsertMode addv = *maddv; 6235 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6236 PetscScalar value; 6237 PetscErrorCode ierr; 6238 6239 MatCheckPreallocated(mat,1); 6240 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6241 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6242 { 6243 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6244 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6245 PetscBool roworiented = aij->roworiented; 6246 6247 /* Some Variables required in the macro */ 6248 Mat A = aij->A; 6249 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6250 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6251 MatScalar *aa = a->a; 6252 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6253 Mat B = aij->B; 6254 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6255 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6256 MatScalar *ba = b->a; 6257 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6258 * cannot use "#if defined" inside a macro. */ 6259 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6260 6261 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6262 PetscInt nonew = a->nonew; 6263 MatScalar *ap1,*ap2; 6264 6265 PetscFunctionBegin; 6266 for (i=0; i<m; i++) { 6267 if (im[i] < 0) continue; 6268 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6269 if (im[i] >= rstart && im[i] < rend) { 6270 row = im[i] - rstart; 6271 lastcol1 = -1; 6272 rp1 = aj + ai[row]; 6273 ap1 = aa + ai[row]; 6274 rmax1 = aimax[row]; 6275 nrow1 = ailen[row]; 6276 low1 = 0; 6277 high1 = nrow1; 6278 lastcol2 = -1; 6279 rp2 = bj + bi[row]; 6280 ap2 = ba + bi[row]; 6281 rmax2 = bimax[row]; 6282 nrow2 = bilen[row]; 6283 low2 = 0; 6284 high2 = nrow2; 6285 6286 for (j=0; j<n; j++) { 6287 if (roworiented) value = v[i*n+j]; 6288 else value = v[i+j*m]; 6289 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6290 if (in[j] >= cstart && in[j] < cend) { 6291 col = in[j] - cstart; 6292 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6293 #if defined(PETSC_HAVE_DEVICE) 6294 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6295 #endif 6296 } else if (in[j] < 0) continue; 6297 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6298 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6299 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6300 } else { 6301 if (mat->was_assembled) { 6302 if (!aij->colmap) { 6303 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6304 } 6305 #if defined(PETSC_USE_CTABLE) 6306 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6307 col--; 6308 #else 6309 col = aij->colmap[in[j]] - 1; 6310 #endif 6311 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6312 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6313 col = in[j]; 6314 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6315 B = aij->B; 6316 b = (Mat_SeqAIJ*)B->data; 6317 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6318 rp2 = bj + bi[row]; 6319 ap2 = ba + bi[row]; 6320 rmax2 = bimax[row]; 6321 nrow2 = bilen[row]; 6322 low2 = 0; 6323 high2 = nrow2; 6324 bm = aij->B->rmap->n; 6325 ba = b->a; 6326 inserted = PETSC_FALSE; 6327 } 6328 } else col = in[j]; 6329 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6330 #if defined(PETSC_HAVE_DEVICE) 6331 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6332 #endif 6333 } 6334 } 6335 } else if (!aij->donotstash) { 6336 if (roworiented) { 6337 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6338 } else { 6339 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6340 } 6341 } 6342 } 6343 } 6344 PetscFunctionReturnVoid(); 6345 } 6346