1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = NULL; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=NULL; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_DEVICE) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 583 if (im[i] >= rstart && im[i] < rend) { 584 row = im[i] - rstart; 585 lastcol1 = -1; 586 rp1 = aj + ai[row]; 587 ap1 = aa + ai[row]; 588 rmax1 = aimax[row]; 589 nrow1 = ailen[row]; 590 low1 = 0; 591 high1 = nrow1; 592 lastcol2 = -1; 593 rp2 = bj + bi[row]; 594 ap2 = ba + bi[row]; 595 rmax2 = bimax[row]; 596 nrow2 = bilen[row]; 597 low2 = 0; 598 high2 = nrow2; 599 600 for (j=0; j<n; j++) { 601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 603 if (in[j] >= cstart && in[j] < cend) { 604 col = in[j] - cstart; 605 nonew = a->nonew; 606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 607 #if defined(PETSC_HAVE_DEVICE) 608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 609 #endif 610 } else if (in[j] < 0) continue; 611 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 612 else { 613 if (mat->was_assembled) { 614 if (!aij->colmap) { 615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 616 } 617 #if defined(PETSC_USE_CTABLE) 618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 619 col--; 620 #else 621 col = aij->colmap[in[j]] - 1; 622 #endif 623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 625 col = in[j]; 626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 627 B = aij->B; 628 b = (Mat_SeqAIJ*)B->data; 629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 630 rp2 = bj + bi[row]; 631 ap2 = ba + bi[row]; 632 rmax2 = bimax[row]; 633 nrow2 = bilen[row]; 634 low2 = 0; 635 high2 = nrow2; 636 bm = aij->B->rmap->n; 637 ba = b->a; 638 inserted = PETSC_FALSE; 639 } else if (col < 0) { 640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 643 } 644 } else col = in[j]; 645 nonew = b->nonew; 646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 647 #if defined(PETSC_HAVE_DEVICE) 648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 649 #endif 650 } 651 } 652 } else { 653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 654 if (!aij->donotstash) { 655 mat->assembled = PETSC_FALSE; 656 if (roworiented) { 657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 658 } else { 659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 /* 668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 671 */ 672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 673 { 674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 675 Mat A = aij->A; /* diagonal part of the matrix */ 676 Mat B = aij->B; /* offdiagonal part of the matrix */ 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 680 PetscInt *ailen = a->ilen,*aj = a->j; 681 PetscInt *bilen = b->ilen,*bj = b->j; 682 PetscInt am = aij->A->rmap->n,j; 683 PetscInt diag_so_far = 0,dnz; 684 PetscInt offd_so_far = 0,onz; 685 686 PetscFunctionBegin; 687 /* Iterate over all rows of the matrix */ 688 for (j=0; j<am; j++) { 689 dnz = onz = 0; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[diag_so_far++] = mat_j[col] - cstart; 695 dnz++; 696 } else { /* off-diagonal entries */ 697 bj[offd_so_far++] = mat_j[col]; 698 onz++; 699 } 700 } 701 ailen[j] = dnz; 702 bilen[j] = onz; 703 } 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 713 */ 714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 717 Mat A = aij->A; /* diagonal part of the matrix */ 718 Mat B = aij->B; /* offdiagonal part of the matrix */ 719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 723 PetscInt *ailen = a->ilen,*aj = a->j; 724 PetscInt *bilen = b->ilen,*bj = b->j; 725 PetscInt am = aij->A->rmap->n,j; 726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 728 PetscScalar *aa = a->a,*ba = b->a; 729 730 PetscFunctionBegin; 731 /* Iterate over all rows of the matrix */ 732 for (j=0; j<am; j++) { 733 dnz_row = onz_row = 0; 734 rowstart_offd = full_offd_i[j]; 735 rowstart_diag = full_diag_i[j]; 736 /* Iterate over all non-zero columns of the current row */ 737 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 738 /* If column is in the diagonal */ 739 if (mat_j[col] >= cstart && mat_j[col] < cend) { 740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 741 aa[rowstart_diag+dnz_row] = mat_a[col]; 742 dnz_row++; 743 } else { /* off-diagonal entries */ 744 bj[rowstart_offd+onz_row] = mat_j[col]; 745 ba[rowstart_offd+onz_row] = mat_a[col]; 746 onz_row++; 747 } 748 } 749 ailen[j] = dnz_row; 750 bilen[j] = onz_row; 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 766 if (idxm[i] >= rstart && idxm[i] < rend) { 767 row = idxm[i] - rstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 771 if (idxn[j] >= cstart && idxn[j] < cend) { 772 col = idxn[j] - cstart; 773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!aij->colmap) { 776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined(PETSC_USE_CTABLE) 779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 780 col--; 781 #else 782 col = aij->colmap[idxn[j]] - 1; 783 #endif 784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 785 else { 786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } 788 } 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 796 { 797 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 798 PetscErrorCode ierr; 799 PetscInt nstash,reallocs; 800 801 PetscFunctionBegin; 802 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 803 804 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 805 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 806 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 807 PetscFunctionReturn(0); 808 } 809 810 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 811 { 812 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 813 PetscErrorCode ierr; 814 PetscMPIInt n; 815 PetscInt i,j,rstart,ncols,flg; 816 PetscInt *row,*col; 817 PetscBool other_disassembled; 818 PetscScalar *val; 819 820 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 821 822 PetscFunctionBegin; 823 if (!aij->donotstash && !mat->nooffprocentries) { 824 while (1) { 825 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 826 if (!flg) break; 827 828 for (i=0; i<n;) { 829 /* Now identify the consecutive vals belonging to the same row */ 830 for (j=i,rstart=row[j]; j<n; j++) { 831 if (row[j] != rstart) break; 832 } 833 if (j < n) ncols = j-i; 834 else ncols = n-i; 835 /* Now assemble all these values with a single function call */ 836 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 837 i = j; 838 } 839 } 840 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 841 } 842 #if defined(PETSC_HAVE_DEVICE) 843 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 844 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 845 if (mat->boundtocpu) { 846 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 847 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 848 } 849 #endif 850 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 851 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 852 853 /* determine if any processor has disassembled, if so we must 854 also disassemble ourself, in order that we may reassemble. */ 855 /* 856 if nonzero structure of submatrix B cannot change then we know that 857 no processor disassembled thus we can skip this stuff 858 */ 859 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 860 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 861 if (mat->was_assembled && !other_disassembled) { 862 #if defined(PETSC_HAVE_DEVICE) 863 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 864 #endif 865 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 866 } 867 } 868 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 869 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 870 } 871 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 872 #if defined(PETSC_HAVE_DEVICE) 873 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 874 #endif 875 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 876 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 877 878 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 879 880 aij->rowvalues = NULL; 881 882 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 883 884 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 885 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 886 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 887 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 888 } 889 #if defined(PETSC_HAVE_DEVICE) 890 mat->offloadmask = PETSC_OFFLOAD_BOTH; 891 #endif 892 PetscFunctionReturn(0); 893 } 894 895 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 896 { 897 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 898 PetscErrorCode ierr; 899 900 PetscFunctionBegin; 901 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 902 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } 905 906 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 909 PetscObjectState sA, sB; 910 PetscInt *lrows; 911 PetscInt r, len; 912 PetscBool cong, lch, gch; 913 PetscErrorCode ierr; 914 915 PetscFunctionBegin; 916 /* get locally owned rows */ 917 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 918 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 919 /* fix right hand side if needed */ 920 if (x && b) { 921 const PetscScalar *xx; 922 PetscScalar *bb; 923 924 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 925 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 926 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 927 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 928 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 929 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 930 } 931 932 sA = mat->A->nonzerostate; 933 sB = mat->B->nonzerostate; 934 935 if (diag != 0.0 && cong) { 936 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 937 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 938 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 939 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 940 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 941 PetscInt nnwA, nnwB; 942 PetscBool nnzA, nnzB; 943 944 nnwA = aijA->nonew; 945 nnwB = aijB->nonew; 946 nnzA = aijA->keepnonzeropattern; 947 nnzB = aijB->keepnonzeropattern; 948 if (!nnzA) { 949 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 950 aijA->nonew = 0; 951 } 952 if (!nnzB) { 953 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 954 aijB->nonew = 0; 955 } 956 /* Must zero here before the next loop */ 957 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 958 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 959 for (r = 0; r < len; ++r) { 960 const PetscInt row = lrows[r] + A->rmap->rstart; 961 if (row >= A->cmap->N) continue; 962 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 963 } 964 aijA->nonew = nnwA; 965 aijB->nonew = nnwB; 966 } else { 967 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 968 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 969 } 970 ierr = PetscFree(lrows);CHKERRQ(ierr); 971 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 972 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 973 974 /* reduce nonzerostate */ 975 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 976 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 977 if (gch) A->nonzerostate++; 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 982 { 983 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 984 PetscErrorCode ierr; 985 PetscMPIInt n = A->rmap->n; 986 PetscInt i,j,r,m,len = 0; 987 PetscInt *lrows,*owners = A->rmap->range; 988 PetscMPIInt p = 0; 989 PetscSFNode *rrows; 990 PetscSF sf; 991 const PetscScalar *xx; 992 PetscScalar *bb,*mask; 993 Vec xmask,lmask; 994 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 995 const PetscInt *aj, *ii,*ridx; 996 PetscScalar *aa; 997 998 PetscFunctionBegin; 999 /* Create SF where leaves are input rows and roots are owned rows */ 1000 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1001 for (r = 0; r < n; ++r) lrows[r] = -1; 1002 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1003 for (r = 0; r < N; ++r) { 1004 const PetscInt idx = rows[r]; 1005 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1006 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1007 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1008 } 1009 rrows[r].rank = p; 1010 rrows[r].index = rows[r] - owners[p]; 1011 } 1012 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1013 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1014 /* Collect flags for rows to be zeroed */ 1015 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1016 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1017 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1018 /* Compress and put in row numbers */ 1019 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1020 /* zero diagonal part of matrix */ 1021 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1022 /* handle off diagonal part of matrix */ 1023 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1026 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1031 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1032 PetscBool cong; 1033 1034 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1035 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1036 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1039 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1040 } 1041 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1042 /* remove zeroed rows of off diagonal matrix */ 1043 ii = aij->i; 1044 for (i=0; i<len; i++) { 1045 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1046 } 1047 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1048 if (aij->compressedrow.use) { 1049 m = aij->compressedrow.nrows; 1050 ii = aij->compressedrow.i; 1051 ridx = aij->compressedrow.rindex; 1052 for (i=0; i<m; i++) { 1053 n = ii[i+1] - ii[i]; 1054 aj = aij->j + ii[i]; 1055 aa = aij->a + ii[i]; 1056 1057 for (j=0; j<n; j++) { 1058 if (PetscAbsScalar(mask[*aj])) { 1059 if (b) bb[*ridx] -= *aa*xx[*aj]; 1060 *aa = 0.0; 1061 } 1062 aa++; 1063 aj++; 1064 } 1065 ridx++; 1066 } 1067 } else { /* do not use compressed row format */ 1068 m = l->B->rmap->n; 1069 for (i=0; i<m; i++) { 1070 n = ii[i+1] - ii[i]; 1071 aj = aij->j + ii[i]; 1072 aa = aij->a + ii[i]; 1073 for (j=0; j<n; j++) { 1074 if (PetscAbsScalar(mask[*aj])) { 1075 if (b) bb[i] -= *aa*xx[*aj]; 1076 *aa = 0.0; 1077 } 1078 aa++; 1079 aj++; 1080 } 1081 } 1082 } 1083 if (x && b) { 1084 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1085 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1086 } 1087 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1088 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1089 ierr = PetscFree(lrows);CHKERRQ(ierr); 1090 1091 /* only change matrix nonzero state if pattern was allowed to be changed */ 1092 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1093 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1094 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1095 } 1096 PetscFunctionReturn(0); 1097 } 1098 1099 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1100 { 1101 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1102 PetscErrorCode ierr; 1103 PetscInt nt; 1104 VecScatter Mvctx = a->Mvctx; 1105 1106 PetscFunctionBegin; 1107 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1108 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1109 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1111 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1127 { 1128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1129 PetscErrorCode ierr; 1130 VecScatter Mvctx = a->Mvctx; 1131 1132 PetscFunctionBegin; 1133 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1134 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1135 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1136 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1137 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1142 { 1143 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1144 PetscErrorCode ierr; 1145 1146 PetscFunctionBegin; 1147 /* do nondiagonal part */ 1148 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1149 /* do local part */ 1150 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1151 /* add partial results together */ 1152 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1153 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1154 PetscFunctionReturn(0); 1155 } 1156 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscBool lf; 1166 PetscMPIInt size; 1167 1168 PetscFunctionBegin; 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1172 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1173 if (!*f) PetscFunctionReturn(0); 1174 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1175 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1176 if (size == 1) PetscFunctionReturn(0); 1177 1178 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1179 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1180 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1181 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1182 for (i=0; i<first; i++) notme[i] = i; 1183 for (i=last; i<M; i++) notme[i-last+first] = i; 1184 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1185 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1186 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1187 Aoff = Aoffs[0]; 1188 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1189 Boff = Boffs[0]; 1190 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1192 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1194 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1195 ierr = PetscFree(notme);CHKERRQ(ierr); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1200 { 1201 PetscErrorCode ierr; 1202 1203 PetscFunctionBegin; 1204 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1209 { 1210 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1211 PetscErrorCode ierr; 1212 1213 PetscFunctionBegin; 1214 /* do nondiagonal part */ 1215 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1216 /* do local part */ 1217 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1218 /* add partial results together */ 1219 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1220 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1221 PetscFunctionReturn(0); 1222 } 1223 1224 /* 1225 This only works correctly for square matrices where the subblock A->A is the 1226 diagonal block 1227 */ 1228 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1229 { 1230 PetscErrorCode ierr; 1231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1232 1233 PetscFunctionBegin; 1234 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1235 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1236 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1241 { 1242 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1243 PetscErrorCode ierr; 1244 1245 PetscFunctionBegin; 1246 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1247 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined(PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1273 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1274 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1275 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1276 1277 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1278 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1279 1280 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1290 #if defined(PETSC_HAVE_CUDA) 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1292 #endif 1293 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1295 #endif 1296 #if defined(PETSC_HAVE_ELEMENTAL) 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1298 #endif 1299 #if defined(PETSC_HAVE_SCALAPACK) 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1301 #endif 1302 #if defined(PETSC_HAVE_HYPRE) 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1305 #endif 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1312 #if defined(PETSC_HAVE_MKL_SPARSE) 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1314 #endif 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1322 { 1323 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1324 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1325 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1326 const PetscInt *garray = aij->garray; 1327 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1328 PetscInt *rowlens; 1329 PetscInt *colidxs; 1330 PetscScalar *matvals; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1335 1336 M = mat->rmap->N; 1337 N = mat->cmap->N; 1338 m = mat->rmap->n; 1339 rs = mat->rmap->rstart; 1340 cs = mat->cmap->rstart; 1341 nz = A->nz + B->nz; 1342 1343 /* write matrix header */ 1344 header[0] = MAT_FILE_CLASSID; 1345 header[1] = M; header[2] = N; header[3] = nz; 1346 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1347 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1348 1349 /* fill in and store row lengths */ 1350 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1351 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1352 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1353 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1354 1355 /* fill in and store column indices */ 1356 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1357 for (cnt=0, i=0; i<m; i++) { 1358 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1359 if (garray[B->j[jb]] > cs) break; 1360 colidxs[cnt++] = garray[B->j[jb]]; 1361 } 1362 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1363 colidxs[cnt++] = A->j[ja] + cs; 1364 for (; jb<B->i[i+1]; jb++) 1365 colidxs[cnt++] = garray[B->j[jb]]; 1366 } 1367 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1368 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1369 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1370 1371 /* fill in and store nonzero values */ 1372 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1373 for (cnt=0, i=0; i<m; i++) { 1374 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1375 if (garray[B->j[jb]] > cs) break; 1376 matvals[cnt++] = B->a[jb]; 1377 } 1378 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1379 matvals[cnt++] = A->a[ja]; 1380 for (; jb<B->i[i+1]; jb++) 1381 matvals[cnt++] = B->a[jb]; 1382 } 1383 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1384 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1385 ierr = PetscFree(matvals);CHKERRQ(ierr); 1386 1387 /* write block size option to the viewer's .info file */ 1388 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #include <petscdraw.h> 1393 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1394 { 1395 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1396 PetscErrorCode ierr; 1397 PetscMPIInt rank = aij->rank,size = aij->size; 1398 PetscBool isdraw,iascii,isbinary; 1399 PetscViewer sviewer; 1400 PetscViewerFormat format; 1401 1402 PetscFunctionBegin; 1403 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1404 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1405 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1406 if (iascii) { 1407 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1408 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1409 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1410 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1411 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1412 for (i=0; i<(PetscInt)size; i++) { 1413 nmax = PetscMax(nmax,nz[i]); 1414 nmin = PetscMin(nmin,nz[i]); 1415 navg += nz[i]; 1416 } 1417 ierr = PetscFree(nz);CHKERRQ(ierr); 1418 navg = navg/size; 1419 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1423 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1424 MatInfo info; 1425 PetscBool inodes; 1426 1427 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1428 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1429 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1430 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1431 if (!inodes) { 1432 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1433 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1434 } else { 1435 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1436 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1437 } 1438 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1439 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1440 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1441 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1442 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1443 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1444 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1445 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1446 PetscFunctionReturn(0); 1447 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1448 PetscInt inodecount,inodelimit,*inodes; 1449 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1450 if (inodes) { 1451 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1452 } else { 1453 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1457 PetscFunctionReturn(0); 1458 } 1459 } else if (isbinary) { 1460 if (size == 1) { 1461 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1462 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1463 } else { 1464 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1465 } 1466 PetscFunctionReturn(0); 1467 } else if (iascii && size == 1) { 1468 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1469 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } else if (isdraw) { 1472 PetscDraw draw; 1473 PetscBool isnull; 1474 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1475 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1476 if (isnull) PetscFunctionReturn(0); 1477 } 1478 1479 { /* assemble the entire matrix onto first processor */ 1480 Mat A = NULL, Av; 1481 IS isrow,iscol; 1482 1483 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1484 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1485 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1486 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1487 /* The commented code uses MatCreateSubMatrices instead */ 1488 /* 1489 Mat *AA, A = NULL, Av; 1490 IS isrow,iscol; 1491 1492 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1493 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1494 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1495 if (!rank) { 1496 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1497 A = AA[0]; 1498 Av = AA[0]; 1499 } 1500 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1501 */ 1502 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1503 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1504 /* 1505 Everyone has to call to draw the matrix since the graphics waits are 1506 synchronized across all processors that share the PetscDraw object 1507 */ 1508 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1509 if (!rank) { 1510 if (((PetscObject)mat)->name) { 1511 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1512 } 1513 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1514 } 1515 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1516 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1517 ierr = MatDestroy(&A);CHKERRQ(ierr); 1518 } 1519 PetscFunctionReturn(0); 1520 } 1521 1522 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1523 { 1524 PetscErrorCode ierr; 1525 PetscBool iascii,isdraw,issocket,isbinary; 1526 1527 PetscFunctionBegin; 1528 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1529 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1530 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1531 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1532 if (iascii || isdraw || isbinary || issocket) { 1533 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1534 } 1535 PetscFunctionReturn(0); 1536 } 1537 1538 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1539 { 1540 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1541 PetscErrorCode ierr; 1542 Vec bb1 = NULL; 1543 PetscBool hasop; 1544 1545 PetscFunctionBegin; 1546 if (flag == SOR_APPLY_UPPER) { 1547 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1548 PetscFunctionReturn(0); 1549 } 1550 1551 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1552 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1553 } 1554 1555 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1556 if (flag & SOR_ZERO_INITIAL_GUESS) { 1557 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1558 its--; 1559 } 1560 1561 while (its--) { 1562 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1563 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1564 1565 /* update rhs: bb1 = bb - B*x */ 1566 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1567 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1568 1569 /* local sweep */ 1570 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1571 } 1572 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1573 if (flag & SOR_ZERO_INITIAL_GUESS) { 1574 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1575 its--; 1576 } 1577 while (its--) { 1578 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1580 1581 /* update rhs: bb1 = bb - B*x */ 1582 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1583 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1584 1585 /* local sweep */ 1586 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1587 } 1588 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1589 if (flag & SOR_ZERO_INITIAL_GUESS) { 1590 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1591 its--; 1592 } 1593 while (its--) { 1594 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1595 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1596 1597 /* update rhs: bb1 = bb - B*x */ 1598 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1599 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1600 1601 /* local sweep */ 1602 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1603 } 1604 } else if (flag & SOR_EISENSTAT) { 1605 Vec xx1; 1606 1607 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1608 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1609 1610 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1611 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1612 if (!mat->diag) { 1613 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1614 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1615 } 1616 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1617 if (hasop) { 1618 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1619 } else { 1620 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1621 } 1622 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1623 1624 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1625 1626 /* local sweep */ 1627 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1628 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1629 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1630 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1631 1632 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1633 1634 matin->factorerrortype = mat->A->factorerrortype; 1635 PetscFunctionReturn(0); 1636 } 1637 1638 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1639 { 1640 Mat aA,aB,Aperm; 1641 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1642 PetscScalar *aa,*ba; 1643 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1644 PetscSF rowsf,sf; 1645 IS parcolp = NULL; 1646 PetscBool done; 1647 PetscErrorCode ierr; 1648 1649 PetscFunctionBegin; 1650 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1651 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1652 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1653 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1654 1655 /* Invert row permutation to find out where my rows should go */ 1656 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1657 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1658 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1659 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1660 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1661 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1662 1663 /* Invert column permutation to find out where my columns should go */ 1664 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1665 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1666 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1667 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1668 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1669 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1670 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1671 1672 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1673 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1674 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1675 1676 /* Find out where my gcols should go */ 1677 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1678 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1679 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1680 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1681 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1682 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1683 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1684 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1685 1686 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1687 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1688 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1689 for (i=0; i<m; i++) { 1690 PetscInt row = rdest[i]; 1691 PetscMPIInt rowner; 1692 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1693 for (j=ai[i]; j<ai[i+1]; j++) { 1694 PetscInt col = cdest[aj[j]]; 1695 PetscMPIInt cowner; 1696 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1697 if (rowner == cowner) dnnz[i]++; 1698 else onnz[i]++; 1699 } 1700 for (j=bi[i]; j<bi[i+1]; j++) { 1701 PetscInt col = gcdest[bj[j]]; 1702 PetscMPIInt cowner; 1703 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1704 if (rowner == cowner) dnnz[i]++; 1705 else onnz[i]++; 1706 } 1707 } 1708 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1709 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1710 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1711 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1712 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1713 1714 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1715 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1716 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1717 for (i=0; i<m; i++) { 1718 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1719 PetscInt j0,rowlen; 1720 rowlen = ai[i+1] - ai[i]; 1721 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1722 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1723 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1724 } 1725 rowlen = bi[i+1] - bi[i]; 1726 for (j0=j=0; j<rowlen; j0=j) { 1727 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1728 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1729 } 1730 } 1731 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1732 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1733 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1734 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1735 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1736 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1737 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1738 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1739 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1740 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1741 *B = Aperm; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1746 { 1747 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1748 PetscErrorCode ierr; 1749 1750 PetscFunctionBegin; 1751 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1752 if (ghosts) *ghosts = aij->garray; 1753 PetscFunctionReturn(0); 1754 } 1755 1756 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1757 { 1758 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1759 Mat A = mat->A,B = mat->B; 1760 PetscErrorCode ierr; 1761 PetscLogDouble isend[5],irecv[5]; 1762 1763 PetscFunctionBegin; 1764 info->block_size = 1.0; 1765 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1766 1767 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1768 isend[3] = info->memory; isend[4] = info->mallocs; 1769 1770 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1771 1772 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1773 isend[3] += info->memory; isend[4] += info->mallocs; 1774 if (flag == MAT_LOCAL) { 1775 info->nz_used = isend[0]; 1776 info->nz_allocated = isend[1]; 1777 info->nz_unneeded = isend[2]; 1778 info->memory = isend[3]; 1779 info->mallocs = isend[4]; 1780 } else if (flag == MAT_GLOBAL_MAX) { 1781 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1782 1783 info->nz_used = irecv[0]; 1784 info->nz_allocated = irecv[1]; 1785 info->nz_unneeded = irecv[2]; 1786 info->memory = irecv[3]; 1787 info->mallocs = irecv[4]; 1788 } else if (flag == MAT_GLOBAL_SUM) { 1789 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1790 1791 info->nz_used = irecv[0]; 1792 info->nz_allocated = irecv[1]; 1793 info->nz_unneeded = irecv[2]; 1794 info->memory = irecv[3]; 1795 info->mallocs = irecv[4]; 1796 } 1797 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1798 info->fill_ratio_needed = 0; 1799 info->factor_mallocs = 0; 1800 PetscFunctionReturn(0); 1801 } 1802 1803 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1804 { 1805 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1806 PetscErrorCode ierr; 1807 1808 PetscFunctionBegin; 1809 switch (op) { 1810 case MAT_NEW_NONZERO_LOCATIONS: 1811 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1812 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1813 case MAT_KEEP_NONZERO_PATTERN: 1814 case MAT_NEW_NONZERO_LOCATION_ERR: 1815 case MAT_USE_INODES: 1816 case MAT_IGNORE_ZERO_ENTRIES: 1817 MatCheckPreallocated(A,1); 1818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1819 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1820 break; 1821 case MAT_ROW_ORIENTED: 1822 MatCheckPreallocated(A,1); 1823 a->roworiented = flg; 1824 1825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1826 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1827 break; 1828 case MAT_NEW_DIAGONALS: 1829 case MAT_SORTED_FULL: 1830 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1831 break; 1832 case MAT_IGNORE_OFF_PROC_ENTRIES: 1833 a->donotstash = flg; 1834 break; 1835 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1836 case MAT_SPD: 1837 case MAT_SYMMETRIC: 1838 case MAT_STRUCTURALLY_SYMMETRIC: 1839 case MAT_HERMITIAN: 1840 case MAT_SYMMETRY_ETERNAL: 1841 break; 1842 case MAT_SUBMAT_SINGLEIS: 1843 A->submat_singleis = flg; 1844 break; 1845 case MAT_STRUCTURE_ONLY: 1846 /* The option is handled directly by MatSetOption() */ 1847 break; 1848 default: 1849 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1850 } 1851 PetscFunctionReturn(0); 1852 } 1853 1854 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1855 { 1856 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1857 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1858 PetscErrorCode ierr; 1859 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1860 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1861 PetscInt *cmap,*idx_p; 1862 1863 PetscFunctionBegin; 1864 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1865 mat->getrowactive = PETSC_TRUE; 1866 1867 if (!mat->rowvalues && (idx || v)) { 1868 /* 1869 allocate enough space to hold information from the longest row. 1870 */ 1871 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1872 PetscInt max = 1,tmp; 1873 for (i=0; i<matin->rmap->n; i++) { 1874 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1875 if (max < tmp) max = tmp; 1876 } 1877 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1878 } 1879 1880 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1881 lrow = row - rstart; 1882 1883 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1884 if (!v) {pvA = NULL; pvB = NULL;} 1885 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1886 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1887 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1888 nztot = nzA + nzB; 1889 1890 cmap = mat->garray; 1891 if (v || idx) { 1892 if (nztot) { 1893 /* Sort by increasing column numbers, assuming A and B already sorted */ 1894 PetscInt imark = -1; 1895 if (v) { 1896 *v = v_p = mat->rowvalues; 1897 for (i=0; i<nzB; i++) { 1898 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1899 else break; 1900 } 1901 imark = i; 1902 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1903 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1904 } 1905 if (idx) { 1906 *idx = idx_p = mat->rowindices; 1907 if (imark > -1) { 1908 for (i=0; i<imark; i++) { 1909 idx_p[i] = cmap[cworkB[i]]; 1910 } 1911 } else { 1912 for (i=0; i<nzB; i++) { 1913 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1914 else break; 1915 } 1916 imark = i; 1917 } 1918 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1919 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1920 } 1921 } else { 1922 if (idx) *idx = NULL; 1923 if (v) *v = NULL; 1924 } 1925 } 1926 *nz = nztot; 1927 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1928 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1929 PetscFunctionReturn(0); 1930 } 1931 1932 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1933 { 1934 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1935 1936 PetscFunctionBegin; 1937 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1938 aij->getrowactive = PETSC_FALSE; 1939 PetscFunctionReturn(0); 1940 } 1941 1942 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1943 { 1944 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1945 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1946 PetscErrorCode ierr; 1947 PetscInt i,j,cstart = mat->cmap->rstart; 1948 PetscReal sum = 0.0; 1949 MatScalar *v; 1950 1951 PetscFunctionBegin; 1952 if (aij->size == 1) { 1953 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1954 } else { 1955 if (type == NORM_FROBENIUS) { 1956 v = amat->a; 1957 for (i=0; i<amat->nz; i++) { 1958 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1959 } 1960 v = bmat->a; 1961 for (i=0; i<bmat->nz; i++) { 1962 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1963 } 1964 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1965 *norm = PetscSqrtReal(*norm); 1966 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1967 } else if (type == NORM_1) { /* max column norm */ 1968 PetscReal *tmp,*tmp2; 1969 PetscInt *jj,*garray = aij->garray; 1970 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1971 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1972 *norm = 0.0; 1973 v = amat->a; jj = amat->j; 1974 for (j=0; j<amat->nz; j++) { 1975 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1976 } 1977 v = bmat->a; jj = bmat->j; 1978 for (j=0; j<bmat->nz; j++) { 1979 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1980 } 1981 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1982 for (j=0; j<mat->cmap->N; j++) { 1983 if (tmp2[j] > *norm) *norm = tmp2[j]; 1984 } 1985 ierr = PetscFree(tmp);CHKERRQ(ierr); 1986 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1987 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1988 } else if (type == NORM_INFINITY) { /* max row norm */ 1989 PetscReal ntemp = 0.0; 1990 for (j=0; j<aij->A->rmap->n; j++) { 1991 v = amat->a + amat->i[j]; 1992 sum = 0.0; 1993 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1994 sum += PetscAbsScalar(*v); v++; 1995 } 1996 v = bmat->a + bmat->i[j]; 1997 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1998 sum += PetscAbsScalar(*v); v++; 1999 } 2000 if (sum > ntemp) ntemp = sum; 2001 } 2002 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2003 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2004 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2005 } 2006 PetscFunctionReturn(0); 2007 } 2008 2009 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2010 { 2011 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2012 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2013 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2014 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2015 PetscErrorCode ierr; 2016 Mat B,A_diag,*B_diag; 2017 const MatScalar *array; 2018 2019 PetscFunctionBegin; 2020 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2021 ai = Aloc->i; aj = Aloc->j; 2022 bi = Bloc->i; bj = Bloc->j; 2023 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2024 PetscInt *d_nnz,*g_nnz,*o_nnz; 2025 PetscSFNode *oloc; 2026 PETSC_UNUSED PetscSF sf; 2027 2028 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2029 /* compute d_nnz for preallocation */ 2030 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2031 for (i=0; i<ai[ma]; i++) { 2032 d_nnz[aj[i]]++; 2033 } 2034 /* compute local off-diagonal contributions */ 2035 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2036 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2037 /* map those to global */ 2038 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2039 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2040 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2041 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2042 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2043 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2044 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2045 2046 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2047 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2048 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2049 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2050 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2051 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2052 } else { 2053 B = *matout; 2054 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2055 } 2056 2057 b = (Mat_MPIAIJ*)B->data; 2058 A_diag = a->A; 2059 B_diag = &b->A; 2060 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2061 A_diag_ncol = A_diag->cmap->N; 2062 B_diag_ilen = sub_B_diag->ilen; 2063 B_diag_i = sub_B_diag->i; 2064 2065 /* Set ilen for diagonal of B */ 2066 for (i=0; i<A_diag_ncol; i++) { 2067 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2068 } 2069 2070 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2071 very quickly (=without using MatSetValues), because all writes are local. */ 2072 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2073 2074 /* copy over the B part */ 2075 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2076 array = Bloc->a; 2077 row = A->rmap->rstart; 2078 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2079 cols_tmp = cols; 2080 for (i=0; i<mb; i++) { 2081 ncol = bi[i+1]-bi[i]; 2082 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2083 row++; 2084 array += ncol; cols_tmp += ncol; 2085 } 2086 ierr = PetscFree(cols);CHKERRQ(ierr); 2087 2088 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2089 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2090 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2091 *matout = B; 2092 } else { 2093 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2094 } 2095 PetscFunctionReturn(0); 2096 } 2097 2098 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2099 { 2100 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2101 Mat a = aij->A,b = aij->B; 2102 PetscErrorCode ierr; 2103 PetscInt s1,s2,s3; 2104 2105 PetscFunctionBegin; 2106 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2107 if (rr) { 2108 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2109 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2110 /* Overlap communication with computation. */ 2111 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2112 } 2113 if (ll) { 2114 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2115 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2116 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 2117 } 2118 /* scale the diagonal block */ 2119 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2120 2121 if (rr) { 2122 /* Do a scatter end and then right scale the off-diagonal block */ 2123 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2124 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2125 } 2126 PetscFunctionReturn(0); 2127 } 2128 2129 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2130 { 2131 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2132 PetscErrorCode ierr; 2133 2134 PetscFunctionBegin; 2135 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2140 { 2141 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2142 Mat a,b,c,d; 2143 PetscBool flg; 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 a = matA->A; b = matA->B; 2148 c = matB->A; d = matB->B; 2149 2150 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2151 if (flg) { 2152 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2153 } 2154 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2155 PetscFunctionReturn(0); 2156 } 2157 2158 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2159 { 2160 PetscErrorCode ierr; 2161 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2162 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2163 2164 PetscFunctionBegin; 2165 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2166 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2167 /* because of the column compression in the off-processor part of the matrix a->B, 2168 the number of columns in a->B and b->B may be different, hence we cannot call 2169 the MatCopy() directly on the two parts. If need be, we can provide a more 2170 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2171 then copying the submatrices */ 2172 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2173 } else { 2174 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2175 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2176 } 2177 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2178 PetscFunctionReturn(0); 2179 } 2180 2181 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2182 { 2183 PetscErrorCode ierr; 2184 2185 PetscFunctionBegin; 2186 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2187 PetscFunctionReturn(0); 2188 } 2189 2190 /* 2191 Computes the number of nonzeros per row needed for preallocation when X and Y 2192 have different nonzero structure. 2193 */ 2194 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2195 { 2196 PetscInt i,j,k,nzx,nzy; 2197 2198 PetscFunctionBegin; 2199 /* Set the number of nonzeros in the new matrix */ 2200 for (i=0; i<m; i++) { 2201 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2202 nzx = xi[i+1] - xi[i]; 2203 nzy = yi[i+1] - yi[i]; 2204 nnz[i] = 0; 2205 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2206 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2207 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2208 nnz[i]++; 2209 } 2210 for (; k<nzy; k++) nnz[i]++; 2211 } 2212 PetscFunctionReturn(0); 2213 } 2214 2215 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2216 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2217 { 2218 PetscErrorCode ierr; 2219 PetscInt m = Y->rmap->N; 2220 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2221 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2222 2223 PetscFunctionBegin; 2224 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2225 PetscFunctionReturn(0); 2226 } 2227 2228 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2229 { 2230 PetscErrorCode ierr; 2231 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2232 PetscBLASInt bnz,one=1; 2233 Mat_SeqAIJ *x,*y; 2234 2235 PetscFunctionBegin; 2236 if (str == SAME_NONZERO_PATTERN) { 2237 PetscScalar alpha = a; 2238 x = (Mat_SeqAIJ*)xx->A->data; 2239 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2240 y = (Mat_SeqAIJ*)yy->A->data; 2241 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2242 x = (Mat_SeqAIJ*)xx->B->data; 2243 y = (Mat_SeqAIJ*)yy->B->data; 2244 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2245 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2246 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2247 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2248 will be updated */ 2249 #if defined(PETSC_HAVE_DEVICE) 2250 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2251 Y->offloadmask = PETSC_OFFLOAD_CPU; 2252 } 2253 #endif 2254 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2255 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2256 } else { 2257 Mat B; 2258 PetscInt *nnz_d,*nnz_o; 2259 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2260 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2261 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2262 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2263 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2264 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2265 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2266 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2267 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2268 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2269 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2270 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2271 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2272 } 2273 PetscFunctionReturn(0); 2274 } 2275 2276 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2277 2278 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2279 { 2280 #if defined(PETSC_USE_COMPLEX) 2281 PetscErrorCode ierr; 2282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2283 2284 PetscFunctionBegin; 2285 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2286 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2287 #else 2288 PetscFunctionBegin; 2289 #endif 2290 PetscFunctionReturn(0); 2291 } 2292 2293 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2294 { 2295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2300 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2305 { 2306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2307 PetscErrorCode ierr; 2308 2309 PetscFunctionBegin; 2310 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2311 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2312 PetscFunctionReturn(0); 2313 } 2314 2315 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2316 { 2317 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2318 PetscErrorCode ierr; 2319 PetscInt i,*idxb = NULL,m = A->rmap->n; 2320 PetscScalar *va,*vv; 2321 Vec vB,vA; 2322 const PetscScalar *vb; 2323 2324 PetscFunctionBegin; 2325 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2326 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2327 2328 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2329 if (idx) { 2330 for (i=0; i<m; i++) { 2331 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2332 } 2333 } 2334 2335 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2336 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2337 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2338 2339 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2340 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2341 for (i=0; i<m; i++) { 2342 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2343 vv[i] = vb[i]; 2344 if (idx) idx[i] = a->garray[idxb[i]]; 2345 } else { 2346 vv[i] = va[i]; 2347 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2348 idx[i] = a->garray[idxb[i]]; 2349 } 2350 } 2351 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2352 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2353 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2354 ierr = PetscFree(idxb);CHKERRQ(ierr); 2355 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2356 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2357 PetscFunctionReturn(0); 2358 } 2359 2360 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2361 { 2362 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2363 PetscInt m = A->rmap->n,n = A->cmap->n; 2364 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2365 PetscInt *cmap = mat->garray; 2366 PetscInt *diagIdx, *offdiagIdx; 2367 Vec diagV, offdiagV; 2368 PetscScalar *a, *diagA, *offdiagA, *ba; 2369 PetscInt r,j,col,ncols,*bi,*bj; 2370 PetscErrorCode ierr; 2371 Mat B = mat->B; 2372 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2373 2374 PetscFunctionBegin; 2375 /* When a process holds entire A and other processes have no entry */ 2376 if (A->cmap->N == n) { 2377 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2378 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2379 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2380 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2381 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2382 PetscFunctionReturn(0); 2383 } else if (n == 0) { 2384 if (m) { 2385 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2386 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2387 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2388 } 2389 PetscFunctionReturn(0); 2390 } 2391 2392 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2393 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2394 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2395 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2396 2397 /* Get offdiagIdx[] for implicit 0.0 */ 2398 ba = b->a; 2399 bi = b->i; 2400 bj = b->j; 2401 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2402 for (r = 0; r < m; r++) { 2403 ncols = bi[r+1] - bi[r]; 2404 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2405 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2406 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2407 offdiagA[r] = 0.0; 2408 2409 /* Find first hole in the cmap */ 2410 for (j=0; j<ncols; j++) { 2411 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2412 if (col > j && j < cstart) { 2413 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2414 break; 2415 } else if (col > j + n && j >= cstart) { 2416 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2417 break; 2418 } 2419 } 2420 if (j == ncols && ncols < A->cmap->N - n) { 2421 /* a hole is outside compressed Bcols */ 2422 if (ncols == 0) { 2423 if (cstart) { 2424 offdiagIdx[r] = 0; 2425 } else offdiagIdx[r] = cend; 2426 } else { /* ncols > 0 */ 2427 offdiagIdx[r] = cmap[ncols-1] + 1; 2428 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2429 } 2430 } 2431 } 2432 2433 for (j=0; j<ncols; j++) { 2434 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2435 ba++; bj++; 2436 } 2437 } 2438 2439 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2440 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2441 for (r = 0; r < m; ++r) { 2442 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2443 a[r] = diagA[r]; 2444 if (idx) idx[r] = cstart + diagIdx[r]; 2445 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2446 a[r] = diagA[r]; 2447 if (idx) { 2448 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2449 idx[r] = cstart + diagIdx[r]; 2450 } else idx[r] = offdiagIdx[r]; 2451 } 2452 } else { 2453 a[r] = offdiagA[r]; 2454 if (idx) idx[r] = offdiagIdx[r]; 2455 } 2456 } 2457 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2458 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2459 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2460 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2461 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2462 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2463 PetscFunctionReturn(0); 2464 } 2465 2466 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2467 { 2468 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2469 PetscInt m = A->rmap->n,n = A->cmap->n; 2470 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2471 PetscInt *cmap = mat->garray; 2472 PetscInt *diagIdx, *offdiagIdx; 2473 Vec diagV, offdiagV; 2474 PetscScalar *a, *diagA, *offdiagA, *ba; 2475 PetscInt r,j,col,ncols,*bi,*bj; 2476 PetscErrorCode ierr; 2477 Mat B = mat->B; 2478 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2479 2480 PetscFunctionBegin; 2481 /* When a process holds entire A and other processes have no entry */ 2482 if (A->cmap->N == n) { 2483 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2484 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2485 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2486 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2487 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2488 PetscFunctionReturn(0); 2489 } else if (n == 0) { 2490 if (m) { 2491 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2492 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2493 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2494 } 2495 PetscFunctionReturn(0); 2496 } 2497 2498 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2499 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2500 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2501 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2502 2503 /* Get offdiagIdx[] for implicit 0.0 */ 2504 ba = b->a; 2505 bi = b->i; 2506 bj = b->j; 2507 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2508 for (r = 0; r < m; r++) { 2509 ncols = bi[r+1] - bi[r]; 2510 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2511 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2512 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2513 offdiagA[r] = 0.0; 2514 2515 /* Find first hole in the cmap */ 2516 for (j=0; j<ncols; j++) { 2517 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2518 if (col > j && j < cstart) { 2519 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2520 break; 2521 } else if (col > j + n && j >= cstart) { 2522 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2523 break; 2524 } 2525 } 2526 if (j == ncols && ncols < A->cmap->N - n) { 2527 /* a hole is outside compressed Bcols */ 2528 if (ncols == 0) { 2529 if (cstart) { 2530 offdiagIdx[r] = 0; 2531 } else offdiagIdx[r] = cend; 2532 } else { /* ncols > 0 */ 2533 offdiagIdx[r] = cmap[ncols-1] + 1; 2534 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2535 } 2536 } 2537 } 2538 2539 for (j=0; j<ncols; j++) { 2540 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2541 ba++; bj++; 2542 } 2543 } 2544 2545 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2546 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2547 for (r = 0; r < m; ++r) { 2548 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2549 a[r] = diagA[r]; 2550 if (idx) idx[r] = cstart + diagIdx[r]; 2551 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2552 a[r] = diagA[r]; 2553 if (idx) { 2554 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2555 idx[r] = cstart + diagIdx[r]; 2556 } else idx[r] = offdiagIdx[r]; 2557 } 2558 } else { 2559 a[r] = offdiagA[r]; 2560 if (idx) idx[r] = offdiagIdx[r]; 2561 } 2562 } 2563 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2564 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2565 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2566 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2567 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2568 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2569 PetscFunctionReturn(0); 2570 } 2571 2572 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2573 { 2574 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2575 PetscInt m = A->rmap->n,n = A->cmap->n; 2576 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2577 PetscInt *cmap = mat->garray; 2578 PetscInt *diagIdx, *offdiagIdx; 2579 Vec diagV, offdiagV; 2580 PetscScalar *a, *diagA, *offdiagA, *ba; 2581 PetscInt r,j,col,ncols,*bi,*bj; 2582 PetscErrorCode ierr; 2583 Mat B = mat->B; 2584 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2585 2586 PetscFunctionBegin; 2587 /* When a process holds entire A and other processes have no entry */ 2588 if (A->cmap->N == n) { 2589 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2590 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2591 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2592 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2593 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2594 PetscFunctionReturn(0); 2595 } else if (n == 0) { 2596 if (m) { 2597 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2598 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2599 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2600 } 2601 PetscFunctionReturn(0); 2602 } 2603 2604 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2605 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2606 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2607 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2608 2609 /* Get offdiagIdx[] for implicit 0.0 */ 2610 ba = b->a; 2611 bi = b->i; 2612 bj = b->j; 2613 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2614 for (r = 0; r < m; r++) { 2615 ncols = bi[r+1] - bi[r]; 2616 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2617 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2618 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2619 offdiagA[r] = 0.0; 2620 2621 /* Find first hole in the cmap */ 2622 for (j=0; j<ncols; j++) { 2623 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2624 if (col > j && j < cstart) { 2625 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2626 break; 2627 } else if (col > j + n && j >= cstart) { 2628 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2629 break; 2630 } 2631 } 2632 if (j == ncols && ncols < A->cmap->N - n) { 2633 /* a hole is outside compressed Bcols */ 2634 if (ncols == 0) { 2635 if (cstart) { 2636 offdiagIdx[r] = 0; 2637 } else offdiagIdx[r] = cend; 2638 } else { /* ncols > 0 */ 2639 offdiagIdx[r] = cmap[ncols-1] + 1; 2640 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2641 } 2642 } 2643 } 2644 2645 for (j=0; j<ncols; j++) { 2646 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2647 ba++; bj++; 2648 } 2649 } 2650 2651 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2652 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2653 for (r = 0; r < m; ++r) { 2654 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2655 a[r] = diagA[r]; 2656 if (idx) idx[r] = cstart + diagIdx[r]; 2657 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2658 a[r] = diagA[r]; 2659 if (idx) { 2660 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2661 idx[r] = cstart + diagIdx[r]; 2662 } else idx[r] = offdiagIdx[r]; 2663 } 2664 } else { 2665 a[r] = offdiagA[r]; 2666 if (idx) idx[r] = offdiagIdx[r]; 2667 } 2668 } 2669 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2670 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2671 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2672 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2673 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2674 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2675 PetscFunctionReturn(0); 2676 } 2677 2678 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2679 { 2680 PetscErrorCode ierr; 2681 Mat *dummy; 2682 2683 PetscFunctionBegin; 2684 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2685 *newmat = *dummy; 2686 ierr = PetscFree(dummy);CHKERRQ(ierr); 2687 PetscFunctionReturn(0); 2688 } 2689 2690 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2691 { 2692 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2693 PetscErrorCode ierr; 2694 2695 PetscFunctionBegin; 2696 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2697 A->factorerrortype = a->A->factorerrortype; 2698 PetscFunctionReturn(0); 2699 } 2700 2701 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2702 { 2703 PetscErrorCode ierr; 2704 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2705 2706 PetscFunctionBegin; 2707 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2708 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2709 if (x->assembled) { 2710 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2711 } else { 2712 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2713 } 2714 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2715 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2716 PetscFunctionReturn(0); 2717 } 2718 2719 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2720 { 2721 PetscFunctionBegin; 2722 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2723 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2724 PetscFunctionReturn(0); 2725 } 2726 2727 /*@ 2728 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2729 2730 Collective on Mat 2731 2732 Input Parameters: 2733 + A - the matrix 2734 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2735 2736 Level: advanced 2737 2738 @*/ 2739 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2740 { 2741 PetscErrorCode ierr; 2742 2743 PetscFunctionBegin; 2744 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2745 PetscFunctionReturn(0); 2746 } 2747 2748 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2749 { 2750 PetscErrorCode ierr; 2751 PetscBool sc = PETSC_FALSE,flg; 2752 2753 PetscFunctionBegin; 2754 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2755 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2756 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2757 if (flg) { 2758 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2759 } 2760 ierr = PetscOptionsTail();CHKERRQ(ierr); 2761 PetscFunctionReturn(0); 2762 } 2763 2764 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2765 { 2766 PetscErrorCode ierr; 2767 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2768 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2769 2770 PetscFunctionBegin; 2771 if (!Y->preallocated) { 2772 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2773 } else if (!aij->nz) { 2774 PetscInt nonew = aij->nonew; 2775 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2776 aij->nonew = nonew; 2777 } 2778 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2779 PetscFunctionReturn(0); 2780 } 2781 2782 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2783 { 2784 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2785 PetscErrorCode ierr; 2786 2787 PetscFunctionBegin; 2788 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2789 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2790 if (d) { 2791 PetscInt rstart; 2792 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2793 *d += rstart; 2794 2795 } 2796 PetscFunctionReturn(0); 2797 } 2798 2799 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2800 { 2801 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2802 PetscErrorCode ierr; 2803 2804 PetscFunctionBegin; 2805 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2806 PetscFunctionReturn(0); 2807 } 2808 2809 /* -------------------------------------------------------------------*/ 2810 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2811 MatGetRow_MPIAIJ, 2812 MatRestoreRow_MPIAIJ, 2813 MatMult_MPIAIJ, 2814 /* 4*/ MatMultAdd_MPIAIJ, 2815 MatMultTranspose_MPIAIJ, 2816 MatMultTransposeAdd_MPIAIJ, 2817 NULL, 2818 NULL, 2819 NULL, 2820 /*10*/ NULL, 2821 NULL, 2822 NULL, 2823 MatSOR_MPIAIJ, 2824 MatTranspose_MPIAIJ, 2825 /*15*/ MatGetInfo_MPIAIJ, 2826 MatEqual_MPIAIJ, 2827 MatGetDiagonal_MPIAIJ, 2828 MatDiagonalScale_MPIAIJ, 2829 MatNorm_MPIAIJ, 2830 /*20*/ MatAssemblyBegin_MPIAIJ, 2831 MatAssemblyEnd_MPIAIJ, 2832 MatSetOption_MPIAIJ, 2833 MatZeroEntries_MPIAIJ, 2834 /*24*/ MatZeroRows_MPIAIJ, 2835 NULL, 2836 NULL, 2837 NULL, 2838 NULL, 2839 /*29*/ MatSetUp_MPIAIJ, 2840 NULL, 2841 NULL, 2842 MatGetDiagonalBlock_MPIAIJ, 2843 NULL, 2844 /*34*/ MatDuplicate_MPIAIJ, 2845 NULL, 2846 NULL, 2847 NULL, 2848 NULL, 2849 /*39*/ MatAXPY_MPIAIJ, 2850 MatCreateSubMatrices_MPIAIJ, 2851 MatIncreaseOverlap_MPIAIJ, 2852 MatGetValues_MPIAIJ, 2853 MatCopy_MPIAIJ, 2854 /*44*/ MatGetRowMax_MPIAIJ, 2855 MatScale_MPIAIJ, 2856 MatShift_MPIAIJ, 2857 MatDiagonalSet_MPIAIJ, 2858 MatZeroRowsColumns_MPIAIJ, 2859 /*49*/ MatSetRandom_MPIAIJ, 2860 NULL, 2861 NULL, 2862 NULL, 2863 NULL, 2864 /*54*/ MatFDColoringCreate_MPIXAIJ, 2865 NULL, 2866 MatSetUnfactored_MPIAIJ, 2867 MatPermute_MPIAIJ, 2868 NULL, 2869 /*59*/ MatCreateSubMatrix_MPIAIJ, 2870 MatDestroy_MPIAIJ, 2871 MatView_MPIAIJ, 2872 NULL, 2873 NULL, 2874 /*64*/ NULL, 2875 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2876 NULL, 2877 NULL, 2878 NULL, 2879 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2880 MatGetRowMinAbs_MPIAIJ, 2881 NULL, 2882 NULL, 2883 NULL, 2884 NULL, 2885 /*75*/ MatFDColoringApply_AIJ, 2886 MatSetFromOptions_MPIAIJ, 2887 NULL, 2888 NULL, 2889 MatFindZeroDiagonals_MPIAIJ, 2890 /*80*/ NULL, 2891 NULL, 2892 NULL, 2893 /*83*/ MatLoad_MPIAIJ, 2894 MatIsSymmetric_MPIAIJ, 2895 NULL, 2896 NULL, 2897 NULL, 2898 NULL, 2899 /*89*/ NULL, 2900 NULL, 2901 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2902 NULL, 2903 NULL, 2904 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2905 NULL, 2906 NULL, 2907 NULL, 2908 MatBindToCPU_MPIAIJ, 2909 /*99*/ MatProductSetFromOptions_MPIAIJ, 2910 NULL, 2911 NULL, 2912 MatConjugate_MPIAIJ, 2913 NULL, 2914 /*104*/MatSetValuesRow_MPIAIJ, 2915 MatRealPart_MPIAIJ, 2916 MatImaginaryPart_MPIAIJ, 2917 NULL, 2918 NULL, 2919 /*109*/NULL, 2920 NULL, 2921 MatGetRowMin_MPIAIJ, 2922 NULL, 2923 MatMissingDiagonal_MPIAIJ, 2924 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2925 NULL, 2926 MatGetGhosts_MPIAIJ, 2927 NULL, 2928 NULL, 2929 /*119*/MatMultDiagonalBlock_MPIAIJ, 2930 NULL, 2931 NULL, 2932 NULL, 2933 MatGetMultiProcBlock_MPIAIJ, 2934 /*124*/MatFindNonzeroRows_MPIAIJ, 2935 MatGetColumnNorms_MPIAIJ, 2936 MatInvertBlockDiagonal_MPIAIJ, 2937 MatInvertVariableBlockDiagonal_MPIAIJ, 2938 MatCreateSubMatricesMPI_MPIAIJ, 2939 /*129*/NULL, 2940 NULL, 2941 NULL, 2942 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2943 NULL, 2944 /*134*/NULL, 2945 NULL, 2946 NULL, 2947 NULL, 2948 NULL, 2949 /*139*/MatSetBlockSizes_MPIAIJ, 2950 NULL, 2951 NULL, 2952 MatFDColoringSetUp_MPIXAIJ, 2953 MatFindOffBlockDiagonalEntries_MPIAIJ, 2954 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2955 /*145*/NULL, 2956 NULL, 2957 NULL 2958 }; 2959 2960 /* ----------------------------------------------------------------------------------------*/ 2961 2962 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2963 { 2964 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2965 PetscErrorCode ierr; 2966 2967 PetscFunctionBegin; 2968 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2969 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2970 PetscFunctionReturn(0); 2971 } 2972 2973 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2974 { 2975 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2976 PetscErrorCode ierr; 2977 2978 PetscFunctionBegin; 2979 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2980 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2981 PetscFunctionReturn(0); 2982 } 2983 2984 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2985 { 2986 Mat_MPIAIJ *b; 2987 PetscErrorCode ierr; 2988 PetscMPIInt size; 2989 2990 PetscFunctionBegin; 2991 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2992 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2993 b = (Mat_MPIAIJ*)B->data; 2994 2995 #if defined(PETSC_USE_CTABLE) 2996 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2997 #else 2998 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2999 #endif 3000 ierr = PetscFree(b->garray);CHKERRQ(ierr); 3001 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 3002 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 3003 3004 /* Because the B will have been resized we simply destroy it and create a new one each time */ 3005 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3006 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3007 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3008 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 3009 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3010 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3011 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3012 3013 if (!B->preallocated) { 3014 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3015 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3016 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3017 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3018 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3019 } 3020 3021 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3022 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3023 B->preallocated = PETSC_TRUE; 3024 B->was_assembled = PETSC_FALSE; 3025 B->assembled = PETSC_FALSE; 3026 PetscFunctionReturn(0); 3027 } 3028 3029 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 3030 { 3031 Mat_MPIAIJ *b; 3032 PetscErrorCode ierr; 3033 3034 PetscFunctionBegin; 3035 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3036 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3037 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3038 b = (Mat_MPIAIJ*)B->data; 3039 3040 #if defined(PETSC_USE_CTABLE) 3041 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 3042 #else 3043 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 3044 #endif 3045 ierr = PetscFree(b->garray);CHKERRQ(ierr); 3046 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 3047 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 3048 3049 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 3050 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 3051 B->preallocated = PETSC_TRUE; 3052 B->was_assembled = PETSC_FALSE; 3053 B->assembled = PETSC_FALSE; 3054 PetscFunctionReturn(0); 3055 } 3056 3057 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3058 { 3059 Mat mat; 3060 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3061 PetscErrorCode ierr; 3062 3063 PetscFunctionBegin; 3064 *newmat = NULL; 3065 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3066 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3067 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3068 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3069 a = (Mat_MPIAIJ*)mat->data; 3070 3071 mat->factortype = matin->factortype; 3072 mat->assembled = matin->assembled; 3073 mat->insertmode = NOT_SET_VALUES; 3074 mat->preallocated = matin->preallocated; 3075 3076 a->size = oldmat->size; 3077 a->rank = oldmat->rank; 3078 a->donotstash = oldmat->donotstash; 3079 a->roworiented = oldmat->roworiented; 3080 a->rowindices = NULL; 3081 a->rowvalues = NULL; 3082 a->getrowactive = PETSC_FALSE; 3083 3084 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3085 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3086 3087 if (oldmat->colmap) { 3088 #if defined(PETSC_USE_CTABLE) 3089 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3090 #else 3091 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 3092 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3093 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 3094 #endif 3095 } else a->colmap = NULL; 3096 if (oldmat->garray) { 3097 PetscInt len; 3098 len = oldmat->B->cmap->n; 3099 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 3100 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3101 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 3102 } else a->garray = NULL; 3103 3104 /* It may happen MatDuplicate is called with a non-assembled matrix 3105 In fact, MatDuplicate only requires the matrix to be preallocated 3106 This may happen inside a DMCreateMatrix_Shell */ 3107 if (oldmat->lvec) { 3108 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3109 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3110 } 3111 if (oldmat->Mvctx) { 3112 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3113 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3114 } 3115 if (oldmat->Mvctx_mpi1) { 3116 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 3117 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 3118 } 3119 3120 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3121 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3122 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3123 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3124 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3125 *newmat = mat; 3126 PetscFunctionReturn(0); 3127 } 3128 3129 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3130 { 3131 PetscBool isbinary, ishdf5; 3132 PetscErrorCode ierr; 3133 3134 PetscFunctionBegin; 3135 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3136 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3137 /* force binary viewer to load .info file if it has not yet done so */ 3138 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3139 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3140 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3141 if (isbinary) { 3142 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3143 } else if (ishdf5) { 3144 #if defined(PETSC_HAVE_HDF5) 3145 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3146 #else 3147 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3148 #endif 3149 } else { 3150 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3151 } 3152 PetscFunctionReturn(0); 3153 } 3154 3155 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3156 { 3157 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3158 PetscInt *rowidxs,*colidxs; 3159 PetscScalar *matvals; 3160 PetscErrorCode ierr; 3161 3162 PetscFunctionBegin; 3163 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3164 3165 /* read in matrix header */ 3166 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3167 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3168 M = header[1]; N = header[2]; nz = header[3]; 3169 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3170 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3171 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3172 3173 /* set block sizes from the viewer's .info file */ 3174 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3175 /* set global sizes if not set already */ 3176 if (mat->rmap->N < 0) mat->rmap->N = M; 3177 if (mat->cmap->N < 0) mat->cmap->N = N; 3178 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3179 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3180 3181 /* check if the matrix sizes are correct */ 3182 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3183 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3184 3185 /* read in row lengths and build row indices */ 3186 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3187 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3188 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3189 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3190 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3191 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3192 /* read in column indices and matrix values */ 3193 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3194 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3195 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3196 /* store matrix indices and values */ 3197 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3198 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3199 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3200 PetscFunctionReturn(0); 3201 } 3202 3203 /* Not scalable because of ISAllGather() unless getting all columns. */ 3204 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3205 { 3206 PetscErrorCode ierr; 3207 IS iscol_local; 3208 PetscBool isstride; 3209 PetscMPIInt lisstride=0,gisstride; 3210 3211 PetscFunctionBegin; 3212 /* check if we are grabbing all columns*/ 3213 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3214 3215 if (isstride) { 3216 PetscInt start,len,mstart,mlen; 3217 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3218 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3219 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3220 if (mstart == start && mlen-mstart == len) lisstride = 1; 3221 } 3222 3223 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3224 if (gisstride) { 3225 PetscInt N; 3226 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3227 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3228 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3229 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3230 } else { 3231 PetscInt cbs; 3232 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3233 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3234 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3235 } 3236 3237 *isseq = iscol_local; 3238 PetscFunctionReturn(0); 3239 } 3240 3241 /* 3242 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3243 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3244 3245 Input Parameters: 3246 mat - matrix 3247 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3248 i.e., mat->rstart <= isrow[i] < mat->rend 3249 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3250 i.e., mat->cstart <= iscol[i] < mat->cend 3251 Output Parameter: 3252 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3253 iscol_o - sequential column index set for retrieving mat->B 3254 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3255 */ 3256 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3257 { 3258 PetscErrorCode ierr; 3259 Vec x,cmap; 3260 const PetscInt *is_idx; 3261 PetscScalar *xarray,*cmaparray; 3262 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3263 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3264 Mat B=a->B; 3265 Vec lvec=a->lvec,lcmap; 3266 PetscInt i,cstart,cend,Bn=B->cmap->N; 3267 MPI_Comm comm; 3268 VecScatter Mvctx=a->Mvctx; 3269 3270 PetscFunctionBegin; 3271 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3272 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3273 3274 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3275 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3276 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3277 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3278 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3279 3280 /* Get start indices */ 3281 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3282 isstart -= ncols; 3283 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3284 3285 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3286 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3287 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3288 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3289 for (i=0; i<ncols; i++) { 3290 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3291 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3292 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3293 } 3294 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3295 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3296 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3297 3298 /* Get iscol_d */ 3299 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3300 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3301 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3302 3303 /* Get isrow_d */ 3304 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3305 rstart = mat->rmap->rstart; 3306 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3307 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3308 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3309 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3310 3311 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3312 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3313 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3314 3315 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3316 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3317 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3318 3319 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3320 3321 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3322 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3323 3324 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3325 /* off-process column indices */ 3326 count = 0; 3327 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3328 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3329 3330 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3331 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3332 for (i=0; i<Bn; i++) { 3333 if (PetscRealPart(xarray[i]) > -1.0) { 3334 idx[count] = i; /* local column index in off-diagonal part B */ 3335 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3336 count++; 3337 } 3338 } 3339 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3340 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3341 3342 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3343 /* cannot ensure iscol_o has same blocksize as iscol! */ 3344 3345 ierr = PetscFree(idx);CHKERRQ(ierr); 3346 *garray = cmap1; 3347 3348 ierr = VecDestroy(&x);CHKERRQ(ierr); 3349 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3350 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3351 PetscFunctionReturn(0); 3352 } 3353 3354 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3355 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3356 { 3357 PetscErrorCode ierr; 3358 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3359 Mat M = NULL; 3360 MPI_Comm comm; 3361 IS iscol_d,isrow_d,iscol_o; 3362 Mat Asub = NULL,Bsub = NULL; 3363 PetscInt n; 3364 3365 PetscFunctionBegin; 3366 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3367 3368 if (call == MAT_REUSE_MATRIX) { 3369 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3370 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3371 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3372 3373 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3374 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3375 3376 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3377 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3378 3379 /* Update diagonal and off-diagonal portions of submat */ 3380 asub = (Mat_MPIAIJ*)(*submat)->data; 3381 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3382 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3383 if (n) { 3384 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3385 } 3386 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3387 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3388 3389 } else { /* call == MAT_INITIAL_MATRIX) */ 3390 const PetscInt *garray; 3391 PetscInt BsubN; 3392 3393 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3394 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3395 3396 /* Create local submatrices Asub and Bsub */ 3397 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3398 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3399 3400 /* Create submatrix M */ 3401 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3402 3403 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3404 asub = (Mat_MPIAIJ*)M->data; 3405 3406 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3407 n = asub->B->cmap->N; 3408 if (BsubN > n) { 3409 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3410 const PetscInt *idx; 3411 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3412 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3413 3414 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3415 j = 0; 3416 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3417 for (i=0; i<n; i++) { 3418 if (j >= BsubN) break; 3419 while (subgarray[i] > garray[j]) j++; 3420 3421 if (subgarray[i] == garray[j]) { 3422 idx_new[i] = idx[j++]; 3423 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3424 } 3425 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3426 3427 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3428 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3429 3430 } else if (BsubN < n) { 3431 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3432 } 3433 3434 ierr = PetscFree(garray);CHKERRQ(ierr); 3435 *submat = M; 3436 3437 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3438 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3439 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3440 3441 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3442 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3443 3444 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3445 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3446 } 3447 PetscFunctionReturn(0); 3448 } 3449 3450 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3451 { 3452 PetscErrorCode ierr; 3453 IS iscol_local=NULL,isrow_d; 3454 PetscInt csize; 3455 PetscInt n,i,j,start,end; 3456 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3457 MPI_Comm comm; 3458 3459 PetscFunctionBegin; 3460 /* If isrow has same processor distribution as mat, 3461 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3462 if (call == MAT_REUSE_MATRIX) { 3463 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3464 if (isrow_d) { 3465 sameRowDist = PETSC_TRUE; 3466 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3467 } else { 3468 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3469 if (iscol_local) { 3470 sameRowDist = PETSC_TRUE; 3471 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3472 } 3473 } 3474 } else { 3475 /* Check if isrow has same processor distribution as mat */ 3476 sameDist[0] = PETSC_FALSE; 3477 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3478 if (!n) { 3479 sameDist[0] = PETSC_TRUE; 3480 } else { 3481 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3482 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3483 if (i >= start && j < end) { 3484 sameDist[0] = PETSC_TRUE; 3485 } 3486 } 3487 3488 /* Check if iscol has same processor distribution as mat */ 3489 sameDist[1] = PETSC_FALSE; 3490 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3491 if (!n) { 3492 sameDist[1] = PETSC_TRUE; 3493 } else { 3494 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3495 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3496 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3497 } 3498 3499 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3500 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3501 sameRowDist = tsameDist[0]; 3502 } 3503 3504 if (sameRowDist) { 3505 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3506 /* isrow and iscol have same processor distribution as mat */ 3507 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3508 PetscFunctionReturn(0); 3509 } else { /* sameRowDist */ 3510 /* isrow has same processor distribution as mat */ 3511 if (call == MAT_INITIAL_MATRIX) { 3512 PetscBool sorted; 3513 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3514 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3515 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3516 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3517 3518 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3519 if (sorted) { 3520 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3521 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3522 PetscFunctionReturn(0); 3523 } 3524 } else { /* call == MAT_REUSE_MATRIX */ 3525 IS iscol_sub; 3526 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3527 if (iscol_sub) { 3528 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3529 PetscFunctionReturn(0); 3530 } 3531 } 3532 } 3533 } 3534 3535 /* General case: iscol -> iscol_local which has global size of iscol */ 3536 if (call == MAT_REUSE_MATRIX) { 3537 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3538 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3539 } else { 3540 if (!iscol_local) { 3541 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3542 } 3543 } 3544 3545 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3546 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3547 3548 if (call == MAT_INITIAL_MATRIX) { 3549 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3550 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3551 } 3552 PetscFunctionReturn(0); 3553 } 3554 3555 /*@C 3556 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3557 and "off-diagonal" part of the matrix in CSR format. 3558 3559 Collective 3560 3561 Input Parameters: 3562 + comm - MPI communicator 3563 . A - "diagonal" portion of matrix 3564 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3565 - garray - global index of B columns 3566 3567 Output Parameter: 3568 . mat - the matrix, with input A as its local diagonal matrix 3569 Level: advanced 3570 3571 Notes: 3572 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3573 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3574 3575 .seealso: MatCreateMPIAIJWithSplitArrays() 3576 @*/ 3577 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3578 { 3579 PetscErrorCode ierr; 3580 Mat_MPIAIJ *maij; 3581 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3582 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3583 PetscScalar *oa=b->a; 3584 Mat Bnew; 3585 PetscInt m,n,N; 3586 3587 PetscFunctionBegin; 3588 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3589 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3590 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3591 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3592 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3593 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3594 3595 /* Get global columns of mat */ 3596 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3597 3598 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3599 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3600 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3601 maij = (Mat_MPIAIJ*)(*mat)->data; 3602 3603 (*mat)->preallocated = PETSC_TRUE; 3604 3605 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3606 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3607 3608 /* Set A as diagonal portion of *mat */ 3609 maij->A = A; 3610 3611 nz = oi[m]; 3612 for (i=0; i<nz; i++) { 3613 col = oj[i]; 3614 oj[i] = garray[col]; 3615 } 3616 3617 /* Set Bnew as off-diagonal portion of *mat */ 3618 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3619 bnew = (Mat_SeqAIJ*)Bnew->data; 3620 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3621 maij->B = Bnew; 3622 3623 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3624 3625 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3626 b->free_a = PETSC_FALSE; 3627 b->free_ij = PETSC_FALSE; 3628 ierr = MatDestroy(&B);CHKERRQ(ierr); 3629 3630 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3631 bnew->free_a = PETSC_TRUE; 3632 bnew->free_ij = PETSC_TRUE; 3633 3634 /* condense columns of maij->B */ 3635 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3636 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3637 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3638 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3639 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3640 PetscFunctionReturn(0); 3641 } 3642 3643 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3644 3645 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3646 { 3647 PetscErrorCode ierr; 3648 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3649 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3650 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3651 Mat M,Msub,B=a->B; 3652 MatScalar *aa; 3653 Mat_SeqAIJ *aij; 3654 PetscInt *garray = a->garray,*colsub,Ncols; 3655 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3656 IS iscol_sub,iscmap; 3657 const PetscInt *is_idx,*cmap; 3658 PetscBool allcolumns=PETSC_FALSE; 3659 MPI_Comm comm; 3660 3661 PetscFunctionBegin; 3662 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3663 3664 if (call == MAT_REUSE_MATRIX) { 3665 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3666 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3667 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3668 3669 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3670 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3671 3672 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3673 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3674 3675 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3676 3677 } else { /* call == MAT_INITIAL_MATRIX) */ 3678 PetscBool flg; 3679 3680 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3681 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3682 3683 /* (1) iscol -> nonscalable iscol_local */ 3684 /* Check for special case: each processor gets entire matrix columns */ 3685 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3686 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3687 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3688 if (allcolumns) { 3689 iscol_sub = iscol_local; 3690 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3691 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3692 3693 } else { 3694 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3695 PetscInt *idx,*cmap1,k; 3696 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3697 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3698 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3699 count = 0; 3700 k = 0; 3701 for (i=0; i<Ncols; i++) { 3702 j = is_idx[i]; 3703 if (j >= cstart && j < cend) { 3704 /* diagonal part of mat */ 3705 idx[count] = j; 3706 cmap1[count++] = i; /* column index in submat */ 3707 } else if (Bn) { 3708 /* off-diagonal part of mat */ 3709 if (j == garray[k]) { 3710 idx[count] = j; 3711 cmap1[count++] = i; /* column index in submat */ 3712 } else if (j > garray[k]) { 3713 while (j > garray[k] && k < Bn-1) k++; 3714 if (j == garray[k]) { 3715 idx[count] = j; 3716 cmap1[count++] = i; /* column index in submat */ 3717 } 3718 } 3719 } 3720 } 3721 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3722 3723 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3724 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3725 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3726 3727 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3728 } 3729 3730 /* (3) Create sequential Msub */ 3731 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3732 } 3733 3734 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3735 aij = (Mat_SeqAIJ*)(Msub)->data; 3736 ii = aij->i; 3737 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3738 3739 /* 3740 m - number of local rows 3741 Ncols - number of columns (same on all processors) 3742 rstart - first row in new global matrix generated 3743 */ 3744 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3745 3746 if (call == MAT_INITIAL_MATRIX) { 3747 /* (4) Create parallel newmat */ 3748 PetscMPIInt rank,size; 3749 PetscInt csize; 3750 3751 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3752 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3753 3754 /* 3755 Determine the number of non-zeros in the diagonal and off-diagonal 3756 portions of the matrix in order to do correct preallocation 3757 */ 3758 3759 /* first get start and end of "diagonal" columns */ 3760 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3761 if (csize == PETSC_DECIDE) { 3762 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3763 if (mglobal == Ncols) { /* square matrix */ 3764 nlocal = m; 3765 } else { 3766 nlocal = Ncols/size + ((Ncols % size) > rank); 3767 } 3768 } else { 3769 nlocal = csize; 3770 } 3771 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3772 rstart = rend - nlocal; 3773 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3774 3775 /* next, compute all the lengths */ 3776 jj = aij->j; 3777 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3778 olens = dlens + m; 3779 for (i=0; i<m; i++) { 3780 jend = ii[i+1] - ii[i]; 3781 olen = 0; 3782 dlen = 0; 3783 for (j=0; j<jend; j++) { 3784 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3785 else dlen++; 3786 jj++; 3787 } 3788 olens[i] = olen; 3789 dlens[i] = dlen; 3790 } 3791 3792 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3793 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3794 3795 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3796 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3797 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3798 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3799 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3800 ierr = PetscFree(dlens);CHKERRQ(ierr); 3801 3802 } else { /* call == MAT_REUSE_MATRIX */ 3803 M = *newmat; 3804 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3805 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3806 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3807 /* 3808 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3809 rather than the slower MatSetValues(). 3810 */ 3811 M->was_assembled = PETSC_TRUE; 3812 M->assembled = PETSC_FALSE; 3813 } 3814 3815 /* (5) Set values of Msub to *newmat */ 3816 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3817 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3818 3819 jj = aij->j; 3820 aa = aij->a; 3821 for (i=0; i<m; i++) { 3822 row = rstart + i; 3823 nz = ii[i+1] - ii[i]; 3824 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3825 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3826 jj += nz; aa += nz; 3827 } 3828 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3829 3830 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3831 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3832 3833 ierr = PetscFree(colsub);CHKERRQ(ierr); 3834 3835 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3836 if (call == MAT_INITIAL_MATRIX) { 3837 *newmat = M; 3838 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3839 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3840 3841 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3842 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3843 3844 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3845 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3846 3847 if (iscol_local) { 3848 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3849 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3850 } 3851 } 3852 PetscFunctionReturn(0); 3853 } 3854 3855 /* 3856 Not great since it makes two copies of the submatrix, first an SeqAIJ 3857 in local and then by concatenating the local matrices the end result. 3858 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3859 3860 Note: This requires a sequential iscol with all indices. 3861 */ 3862 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3863 { 3864 PetscErrorCode ierr; 3865 PetscMPIInt rank,size; 3866 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3867 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3868 Mat M,Mreuse; 3869 MatScalar *aa,*vwork; 3870 MPI_Comm comm; 3871 Mat_SeqAIJ *aij; 3872 PetscBool colflag,allcolumns=PETSC_FALSE; 3873 3874 PetscFunctionBegin; 3875 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3876 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3877 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3878 3879 /* Check for special case: each processor gets entire matrix columns */ 3880 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3881 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3882 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3883 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3884 3885 if (call == MAT_REUSE_MATRIX) { 3886 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3887 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3888 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3889 } else { 3890 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3891 } 3892 3893 /* 3894 m - number of local rows 3895 n - number of columns (same on all processors) 3896 rstart - first row in new global matrix generated 3897 */ 3898 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3899 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3900 if (call == MAT_INITIAL_MATRIX) { 3901 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3902 ii = aij->i; 3903 jj = aij->j; 3904 3905 /* 3906 Determine the number of non-zeros in the diagonal and off-diagonal 3907 portions of the matrix in order to do correct preallocation 3908 */ 3909 3910 /* first get start and end of "diagonal" columns */ 3911 if (csize == PETSC_DECIDE) { 3912 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3913 if (mglobal == n) { /* square matrix */ 3914 nlocal = m; 3915 } else { 3916 nlocal = n/size + ((n % size) > rank); 3917 } 3918 } else { 3919 nlocal = csize; 3920 } 3921 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3922 rstart = rend - nlocal; 3923 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3924 3925 /* next, compute all the lengths */ 3926 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3927 olens = dlens + m; 3928 for (i=0; i<m; i++) { 3929 jend = ii[i+1] - ii[i]; 3930 olen = 0; 3931 dlen = 0; 3932 for (j=0; j<jend; j++) { 3933 if (*jj < rstart || *jj >= rend) olen++; 3934 else dlen++; 3935 jj++; 3936 } 3937 olens[i] = olen; 3938 dlens[i] = dlen; 3939 } 3940 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3941 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3942 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3943 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3944 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3945 ierr = PetscFree(dlens);CHKERRQ(ierr); 3946 } else { 3947 PetscInt ml,nl; 3948 3949 M = *newmat; 3950 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3951 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3952 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3953 /* 3954 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3955 rather than the slower MatSetValues(). 3956 */ 3957 M->was_assembled = PETSC_TRUE; 3958 M->assembled = PETSC_FALSE; 3959 } 3960 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3961 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3962 ii = aij->i; 3963 jj = aij->j; 3964 aa = aij->a; 3965 for (i=0; i<m; i++) { 3966 row = rstart + i; 3967 nz = ii[i+1] - ii[i]; 3968 cwork = jj; jj += nz; 3969 vwork = aa; aa += nz; 3970 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3971 } 3972 3973 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3974 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3975 *newmat = M; 3976 3977 /* save submatrix used in processor for next request */ 3978 if (call == MAT_INITIAL_MATRIX) { 3979 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3980 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3981 } 3982 PetscFunctionReturn(0); 3983 } 3984 3985 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3986 { 3987 PetscInt m,cstart, cend,j,nnz,i,d; 3988 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3989 const PetscInt *JJ; 3990 PetscErrorCode ierr; 3991 PetscBool nooffprocentries; 3992 3993 PetscFunctionBegin; 3994 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3995 3996 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3997 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3998 m = B->rmap->n; 3999 cstart = B->cmap->rstart; 4000 cend = B->cmap->rend; 4001 rstart = B->rmap->rstart; 4002 4003 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 4004 4005 if (PetscDefined(USE_DEBUG)) { 4006 for (i=0; i<m; i++) { 4007 nnz = Ii[i+1]- Ii[i]; 4008 JJ = J + Ii[i]; 4009 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 4010 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 4011 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 4012 } 4013 } 4014 4015 for (i=0; i<m; i++) { 4016 nnz = Ii[i+1]- Ii[i]; 4017 JJ = J + Ii[i]; 4018 nnz_max = PetscMax(nnz_max,nnz); 4019 d = 0; 4020 for (j=0; j<nnz; j++) { 4021 if (cstart <= JJ[j] && JJ[j] < cend) d++; 4022 } 4023 d_nnz[i] = d; 4024 o_nnz[i] = nnz - d; 4025 } 4026 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 4027 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 4028 4029 for (i=0; i<m; i++) { 4030 ii = i + rstart; 4031 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 4032 } 4033 nooffprocentries = B->nooffprocentries; 4034 B->nooffprocentries = PETSC_TRUE; 4035 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4036 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4037 B->nooffprocentries = nooffprocentries; 4038 4039 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4040 PetscFunctionReturn(0); 4041 } 4042 4043 /*@ 4044 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4045 (the default parallel PETSc format). 4046 4047 Collective 4048 4049 Input Parameters: 4050 + B - the matrix 4051 . i - the indices into j for the start of each local row (starts with zero) 4052 . j - the column indices for each local row (starts with zero) 4053 - v - optional values in the matrix 4054 4055 Level: developer 4056 4057 Notes: 4058 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 4059 thus you CANNOT change the matrix entries by changing the values of v[] after you have 4060 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4061 4062 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4063 4064 The format which is used for the sparse matrix input, is equivalent to a 4065 row-major ordering.. i.e for the following matrix, the input data expected is 4066 as shown 4067 4068 $ 1 0 0 4069 $ 2 0 3 P0 4070 $ ------- 4071 $ 4 5 6 P1 4072 $ 4073 $ Process0 [P0]: rows_owned=[0,1] 4074 $ i = {0,1,3} [size = nrow+1 = 2+1] 4075 $ j = {0,0,2} [size = 3] 4076 $ v = {1,2,3} [size = 3] 4077 $ 4078 $ Process1 [P1]: rows_owned=[2] 4079 $ i = {0,3} [size = nrow+1 = 1+1] 4080 $ j = {0,1,2} [size = 3] 4081 $ v = {4,5,6} [size = 3] 4082 4083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4084 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4085 @*/ 4086 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4087 { 4088 PetscErrorCode ierr; 4089 4090 PetscFunctionBegin; 4091 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4092 PetscFunctionReturn(0); 4093 } 4094 4095 /*@C 4096 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4097 (the default parallel PETSc format). For good matrix assembly performance 4098 the user should preallocate the matrix storage by setting the parameters 4099 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4100 performance can be increased by more than a factor of 50. 4101 4102 Collective 4103 4104 Input Parameters: 4105 + B - the matrix 4106 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4107 (same value is used for all local rows) 4108 . d_nnz - array containing the number of nonzeros in the various rows of the 4109 DIAGONAL portion of the local submatrix (possibly different for each row) 4110 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4111 The size of this array is equal to the number of local rows, i.e 'm'. 4112 For matrices that will be factored, you must leave room for (and set) 4113 the diagonal entry even if it is zero. 4114 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4115 submatrix (same value is used for all local rows). 4116 - o_nnz - array containing the number of nonzeros in the various rows of the 4117 OFF-DIAGONAL portion of the local submatrix (possibly different for 4118 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4119 structure. The size of this array is equal to the number 4120 of local rows, i.e 'm'. 4121 4122 If the *_nnz parameter is given then the *_nz parameter is ignored 4123 4124 The AIJ format (also called the Yale sparse matrix format or 4125 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4126 storage. The stored row and column indices begin with zero. 4127 See Users-Manual: ch_mat for details. 4128 4129 The parallel matrix is partitioned such that the first m0 rows belong to 4130 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4131 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4132 4133 The DIAGONAL portion of the local submatrix of a processor can be defined 4134 as the submatrix which is obtained by extraction the part corresponding to 4135 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4136 first row that belongs to the processor, r2 is the last row belonging to 4137 the this processor, and c1-c2 is range of indices of the local part of a 4138 vector suitable for applying the matrix to. This is an mxn matrix. In the 4139 common case of a square matrix, the row and column ranges are the same and 4140 the DIAGONAL part is also square. The remaining portion of the local 4141 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4142 4143 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4144 4145 You can call MatGetInfo() to get information on how effective the preallocation was; 4146 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4147 You can also run with the option -info and look for messages with the string 4148 malloc in them to see if additional memory allocation was needed. 4149 4150 Example usage: 4151 4152 Consider the following 8x8 matrix with 34 non-zero values, that is 4153 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4154 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4155 as follows: 4156 4157 .vb 4158 1 2 0 | 0 3 0 | 0 4 4159 Proc0 0 5 6 | 7 0 0 | 8 0 4160 9 0 10 | 11 0 0 | 12 0 4161 ------------------------------------- 4162 13 0 14 | 15 16 17 | 0 0 4163 Proc1 0 18 0 | 19 20 21 | 0 0 4164 0 0 0 | 22 23 0 | 24 0 4165 ------------------------------------- 4166 Proc2 25 26 27 | 0 0 28 | 29 0 4167 30 0 0 | 31 32 33 | 0 34 4168 .ve 4169 4170 This can be represented as a collection of submatrices as: 4171 4172 .vb 4173 A B C 4174 D E F 4175 G H I 4176 .ve 4177 4178 Where the submatrices A,B,C are owned by proc0, D,E,F are 4179 owned by proc1, G,H,I are owned by proc2. 4180 4181 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4182 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4183 The 'M','N' parameters are 8,8, and have the same values on all procs. 4184 4185 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4186 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4187 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4188 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4189 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4190 matrix, ans [DF] as another SeqAIJ matrix. 4191 4192 When d_nz, o_nz parameters are specified, d_nz storage elements are 4193 allocated for every row of the local diagonal submatrix, and o_nz 4194 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4195 One way to choose d_nz and o_nz is to use the max nonzerors per local 4196 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4197 In this case, the values of d_nz,o_nz are: 4198 .vb 4199 proc0 : dnz = 2, o_nz = 2 4200 proc1 : dnz = 3, o_nz = 2 4201 proc2 : dnz = 1, o_nz = 4 4202 .ve 4203 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4204 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4205 for proc3. i.e we are using 12+15+10=37 storage locations to store 4206 34 values. 4207 4208 When d_nnz, o_nnz parameters are specified, the storage is specified 4209 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4210 In the above case the values for d_nnz,o_nnz are: 4211 .vb 4212 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4213 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4214 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4215 .ve 4216 Here the space allocated is sum of all the above values i.e 34, and 4217 hence pre-allocation is perfect. 4218 4219 Level: intermediate 4220 4221 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4222 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4223 @*/ 4224 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4225 { 4226 PetscErrorCode ierr; 4227 4228 PetscFunctionBegin; 4229 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4230 PetscValidType(B,1); 4231 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4232 PetscFunctionReturn(0); 4233 } 4234 4235 /*@ 4236 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4237 CSR format for the local rows. 4238 4239 Collective 4240 4241 Input Parameters: 4242 + comm - MPI communicator 4243 . m - number of local rows (Cannot be PETSC_DECIDE) 4244 . n - This value should be the same as the local size used in creating the 4245 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4246 calculated if N is given) For square matrices n is almost always m. 4247 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4248 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4249 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4250 . j - column indices 4251 - a - matrix values 4252 4253 Output Parameter: 4254 . mat - the matrix 4255 4256 Level: intermediate 4257 4258 Notes: 4259 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4260 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4261 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4262 4263 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4264 4265 The format which is used for the sparse matrix input, is equivalent to a 4266 row-major ordering.. i.e for the following matrix, the input data expected is 4267 as shown 4268 4269 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4270 4271 $ 1 0 0 4272 $ 2 0 3 P0 4273 $ ------- 4274 $ 4 5 6 P1 4275 $ 4276 $ Process0 [P0]: rows_owned=[0,1] 4277 $ i = {0,1,3} [size = nrow+1 = 2+1] 4278 $ j = {0,0,2} [size = 3] 4279 $ v = {1,2,3} [size = 3] 4280 $ 4281 $ Process1 [P1]: rows_owned=[2] 4282 $ i = {0,3} [size = nrow+1 = 1+1] 4283 $ j = {0,1,2} [size = 3] 4284 $ v = {4,5,6} [size = 3] 4285 4286 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4287 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4288 @*/ 4289 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4290 { 4291 PetscErrorCode ierr; 4292 4293 PetscFunctionBegin; 4294 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4295 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4296 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4297 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4298 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4299 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4300 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4301 PetscFunctionReturn(0); 4302 } 4303 4304 /*@ 4305 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4306 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4307 4308 Collective 4309 4310 Input Parameters: 4311 + mat - the matrix 4312 . m - number of local rows (Cannot be PETSC_DECIDE) 4313 . n - This value should be the same as the local size used in creating the 4314 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4315 calculated if N is given) For square matrices n is almost always m. 4316 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4317 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4318 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4319 . J - column indices 4320 - v - matrix values 4321 4322 Level: intermediate 4323 4324 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4325 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4326 @*/ 4327 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4328 { 4329 PetscErrorCode ierr; 4330 PetscInt cstart,nnz,i,j; 4331 PetscInt *ld; 4332 PetscBool nooffprocentries; 4333 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4334 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4335 PetscScalar *ad = Ad->a, *ao = Ao->a; 4336 const PetscInt *Adi = Ad->i; 4337 PetscInt ldi,Iii,md; 4338 4339 PetscFunctionBegin; 4340 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4341 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4342 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4343 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4344 4345 cstart = mat->cmap->rstart; 4346 if (!Aij->ld) { 4347 /* count number of entries below block diagonal */ 4348 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4349 Aij->ld = ld; 4350 for (i=0; i<m; i++) { 4351 nnz = Ii[i+1]- Ii[i]; 4352 j = 0; 4353 while (J[j] < cstart && j < nnz) {j++;} 4354 J += nnz; 4355 ld[i] = j; 4356 } 4357 } else { 4358 ld = Aij->ld; 4359 } 4360 4361 for (i=0; i<m; i++) { 4362 nnz = Ii[i+1]- Ii[i]; 4363 Iii = Ii[i]; 4364 ldi = ld[i]; 4365 md = Adi[i+1]-Adi[i]; 4366 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4367 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4368 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4369 ad += md; 4370 ao += nnz - md; 4371 } 4372 nooffprocentries = mat->nooffprocentries; 4373 mat->nooffprocentries = PETSC_TRUE; 4374 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4375 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4376 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4377 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4378 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4379 mat->nooffprocentries = nooffprocentries; 4380 PetscFunctionReturn(0); 4381 } 4382 4383 /*@C 4384 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4385 (the default parallel PETSc format). For good matrix assembly performance 4386 the user should preallocate the matrix storage by setting the parameters 4387 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4388 performance can be increased by more than a factor of 50. 4389 4390 Collective 4391 4392 Input Parameters: 4393 + comm - MPI communicator 4394 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4395 This value should be the same as the local size used in creating the 4396 y vector for the matrix-vector product y = Ax. 4397 . n - This value should be the same as the local size used in creating the 4398 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4399 calculated if N is given) For square matrices n is almost always m. 4400 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4401 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4402 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4403 (same value is used for all local rows) 4404 . d_nnz - array containing the number of nonzeros in the various rows of the 4405 DIAGONAL portion of the local submatrix (possibly different for each row) 4406 or NULL, if d_nz is used to specify the nonzero structure. 4407 The size of this array is equal to the number of local rows, i.e 'm'. 4408 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4409 submatrix (same value is used for all local rows). 4410 - o_nnz - array containing the number of nonzeros in the various rows of the 4411 OFF-DIAGONAL portion of the local submatrix (possibly different for 4412 each row) or NULL, if o_nz is used to specify the nonzero 4413 structure. The size of this array is equal to the number 4414 of local rows, i.e 'm'. 4415 4416 Output Parameter: 4417 . A - the matrix 4418 4419 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4420 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4421 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4422 4423 Notes: 4424 If the *_nnz parameter is given then the *_nz parameter is ignored 4425 4426 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4427 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4428 storage requirements for this matrix. 4429 4430 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4431 processor than it must be used on all processors that share the object for 4432 that argument. 4433 4434 The user MUST specify either the local or global matrix dimensions 4435 (possibly both). 4436 4437 The parallel matrix is partitioned across processors such that the 4438 first m0 rows belong to process 0, the next m1 rows belong to 4439 process 1, the next m2 rows belong to process 2 etc.. where 4440 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4441 values corresponding to [m x N] submatrix. 4442 4443 The columns are logically partitioned with the n0 columns belonging 4444 to 0th partition, the next n1 columns belonging to the next 4445 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4446 4447 The DIAGONAL portion of the local submatrix on any given processor 4448 is the submatrix corresponding to the rows and columns m,n 4449 corresponding to the given processor. i.e diagonal matrix on 4450 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4451 etc. The remaining portion of the local submatrix [m x (N-n)] 4452 constitute the OFF-DIAGONAL portion. The example below better 4453 illustrates this concept. 4454 4455 For a square global matrix we define each processor's diagonal portion 4456 to be its local rows and the corresponding columns (a square submatrix); 4457 each processor's off-diagonal portion encompasses the remainder of the 4458 local matrix (a rectangular submatrix). 4459 4460 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4461 4462 When calling this routine with a single process communicator, a matrix of 4463 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4464 type of communicator, use the construction mechanism 4465 .vb 4466 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4467 .ve 4468 4469 $ MatCreate(...,&A); 4470 $ MatSetType(A,MATMPIAIJ); 4471 $ MatSetSizes(A, m,n,M,N); 4472 $ MatMPIAIJSetPreallocation(A,...); 4473 4474 By default, this format uses inodes (identical nodes) when possible. 4475 We search for consecutive rows with the same nonzero structure, thereby 4476 reusing matrix information to achieve increased efficiency. 4477 4478 Options Database Keys: 4479 + -mat_no_inode - Do not use inodes 4480 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4481 4482 4483 4484 Example usage: 4485 4486 Consider the following 8x8 matrix with 34 non-zero values, that is 4487 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4488 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4489 as follows 4490 4491 .vb 4492 1 2 0 | 0 3 0 | 0 4 4493 Proc0 0 5 6 | 7 0 0 | 8 0 4494 9 0 10 | 11 0 0 | 12 0 4495 ------------------------------------- 4496 13 0 14 | 15 16 17 | 0 0 4497 Proc1 0 18 0 | 19 20 21 | 0 0 4498 0 0 0 | 22 23 0 | 24 0 4499 ------------------------------------- 4500 Proc2 25 26 27 | 0 0 28 | 29 0 4501 30 0 0 | 31 32 33 | 0 34 4502 .ve 4503 4504 This can be represented as a collection of submatrices as 4505 4506 .vb 4507 A B C 4508 D E F 4509 G H I 4510 .ve 4511 4512 Where the submatrices A,B,C are owned by proc0, D,E,F are 4513 owned by proc1, G,H,I are owned by proc2. 4514 4515 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4516 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4517 The 'M','N' parameters are 8,8, and have the same values on all procs. 4518 4519 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4520 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4521 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4522 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4523 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4524 matrix, ans [DF] as another SeqAIJ matrix. 4525 4526 When d_nz, o_nz parameters are specified, d_nz storage elements are 4527 allocated for every row of the local diagonal submatrix, and o_nz 4528 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4529 One way to choose d_nz and o_nz is to use the max nonzerors per local 4530 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4531 In this case, the values of d_nz,o_nz are 4532 .vb 4533 proc0 : dnz = 2, o_nz = 2 4534 proc1 : dnz = 3, o_nz = 2 4535 proc2 : dnz = 1, o_nz = 4 4536 .ve 4537 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4538 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4539 for proc3. i.e we are using 12+15+10=37 storage locations to store 4540 34 values. 4541 4542 When d_nnz, o_nnz parameters are specified, the storage is specified 4543 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4544 In the above case the values for d_nnz,o_nnz are 4545 .vb 4546 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4547 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4548 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4549 .ve 4550 Here the space allocated is sum of all the above values i.e 34, and 4551 hence pre-allocation is perfect. 4552 4553 Level: intermediate 4554 4555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4556 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4557 @*/ 4558 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4559 { 4560 PetscErrorCode ierr; 4561 PetscMPIInt size; 4562 4563 PetscFunctionBegin; 4564 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4565 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4566 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4567 if (size > 1) { 4568 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4569 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4570 } else { 4571 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4572 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4573 } 4574 PetscFunctionReturn(0); 4575 } 4576 4577 /*@C 4578 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4579 4580 Not collective 4581 4582 Input Parameter: 4583 . A - The MPIAIJ matrix 4584 4585 Output Parameters: 4586 + Ad - The local diagonal block as a SeqAIJ matrix 4587 . Ao - The local off-diagonal block as a SeqAIJ matrix 4588 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4589 4590 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4591 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4592 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4593 local column numbers to global column numbers in the original matrix. 4594 4595 Level: intermediate 4596 4597 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4598 @*/ 4599 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4600 { 4601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4602 PetscBool flg; 4603 PetscErrorCode ierr; 4604 4605 PetscFunctionBegin; 4606 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4607 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4608 if (Ad) *Ad = a->A; 4609 if (Ao) *Ao = a->B; 4610 if (colmap) *colmap = a->garray; 4611 PetscFunctionReturn(0); 4612 } 4613 4614 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4615 { 4616 PetscErrorCode ierr; 4617 PetscInt m,N,i,rstart,nnz,Ii; 4618 PetscInt *indx; 4619 PetscScalar *values; 4620 4621 PetscFunctionBegin; 4622 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4623 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4624 PetscInt *dnz,*onz,sum,bs,cbs; 4625 4626 if (n == PETSC_DECIDE) { 4627 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4628 } 4629 /* Check sum(n) = N */ 4630 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4631 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4632 4633 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4634 rstart -= m; 4635 4636 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4637 for (i=0; i<m; i++) { 4638 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4639 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4640 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4641 } 4642 4643 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4644 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4645 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4646 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4647 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4648 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4649 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4650 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4651 } 4652 4653 /* numeric phase */ 4654 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4655 for (i=0; i<m; i++) { 4656 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4657 Ii = i + rstart; 4658 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4659 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4660 } 4661 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4662 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4663 PetscFunctionReturn(0); 4664 } 4665 4666 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4667 { 4668 PetscErrorCode ierr; 4669 PetscMPIInt rank; 4670 PetscInt m,N,i,rstart,nnz; 4671 size_t len; 4672 const PetscInt *indx; 4673 PetscViewer out; 4674 char *name; 4675 Mat B; 4676 const PetscScalar *values; 4677 4678 PetscFunctionBegin; 4679 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4680 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4681 /* Should this be the type of the diagonal block of A? */ 4682 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4683 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4684 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4685 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4686 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4687 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4688 for (i=0; i<m; i++) { 4689 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4690 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4691 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4692 } 4693 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4694 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4695 4696 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4697 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4698 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4699 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4700 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4701 ierr = PetscFree(name);CHKERRQ(ierr); 4702 ierr = MatView(B,out);CHKERRQ(ierr); 4703 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4704 ierr = MatDestroy(&B);CHKERRQ(ierr); 4705 PetscFunctionReturn(0); 4706 } 4707 4708 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4709 { 4710 PetscErrorCode ierr; 4711 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4712 4713 PetscFunctionBegin; 4714 if (!merge) PetscFunctionReturn(0); 4715 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4716 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4717 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4718 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4719 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4720 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4721 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4722 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4723 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4724 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4725 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4726 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4727 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4728 ierr = PetscFree(merge);CHKERRQ(ierr); 4729 PetscFunctionReturn(0); 4730 } 4731 4732 #include <../src/mat/utils/freespace.h> 4733 #include <petscbt.h> 4734 4735 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4736 { 4737 PetscErrorCode ierr; 4738 MPI_Comm comm; 4739 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4740 PetscMPIInt size,rank,taga,*len_s; 4741 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4742 PetscInt proc,m; 4743 PetscInt **buf_ri,**buf_rj; 4744 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4745 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4746 MPI_Request *s_waits,*r_waits; 4747 MPI_Status *status; 4748 MatScalar *aa=a->a; 4749 MatScalar **abuf_r,*ba_i; 4750 Mat_Merge_SeqsToMPI *merge; 4751 PetscContainer container; 4752 4753 PetscFunctionBegin; 4754 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4755 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4756 4757 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4758 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4759 4760 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4761 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4762 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4763 4764 bi = merge->bi; 4765 bj = merge->bj; 4766 buf_ri = merge->buf_ri; 4767 buf_rj = merge->buf_rj; 4768 4769 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4770 owners = merge->rowmap->range; 4771 len_s = merge->len_s; 4772 4773 /* send and recv matrix values */ 4774 /*-----------------------------*/ 4775 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4776 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4777 4778 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4779 for (proc=0,k=0; proc<size; proc++) { 4780 if (!len_s[proc]) continue; 4781 i = owners[proc]; 4782 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4783 k++; 4784 } 4785 4786 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4787 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4788 ierr = PetscFree(status);CHKERRQ(ierr); 4789 4790 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4791 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4792 4793 /* insert mat values of mpimat */ 4794 /*----------------------------*/ 4795 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4796 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4797 4798 for (k=0; k<merge->nrecv; k++) { 4799 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4800 nrows = *(buf_ri_k[k]); 4801 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4802 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4803 } 4804 4805 /* set values of ba */ 4806 m = merge->rowmap->n; 4807 for (i=0; i<m; i++) { 4808 arow = owners[rank] + i; 4809 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4810 bnzi = bi[i+1] - bi[i]; 4811 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4812 4813 /* add local non-zero vals of this proc's seqmat into ba */ 4814 anzi = ai[arow+1] - ai[arow]; 4815 aj = a->j + ai[arow]; 4816 aa = a->a + ai[arow]; 4817 nextaj = 0; 4818 for (j=0; nextaj<anzi; j++) { 4819 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4820 ba_i[j] += aa[nextaj++]; 4821 } 4822 } 4823 4824 /* add received vals into ba */ 4825 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4826 /* i-th row */ 4827 if (i == *nextrow[k]) { 4828 anzi = *(nextai[k]+1) - *nextai[k]; 4829 aj = buf_rj[k] + *(nextai[k]); 4830 aa = abuf_r[k] + *(nextai[k]); 4831 nextaj = 0; 4832 for (j=0; nextaj<anzi; j++) { 4833 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4834 ba_i[j] += aa[nextaj++]; 4835 } 4836 } 4837 nextrow[k]++; nextai[k]++; 4838 } 4839 } 4840 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4841 } 4842 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4843 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4844 4845 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4846 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4847 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4848 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4849 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4850 PetscFunctionReturn(0); 4851 } 4852 4853 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4854 { 4855 PetscErrorCode ierr; 4856 Mat B_mpi; 4857 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4858 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4859 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4860 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4861 PetscInt len,proc,*dnz,*onz,bs,cbs; 4862 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4863 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4864 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4865 MPI_Status *status; 4866 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4867 PetscBT lnkbt; 4868 Mat_Merge_SeqsToMPI *merge; 4869 PetscContainer container; 4870 4871 PetscFunctionBegin; 4872 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4873 4874 /* make sure it is a PETSc comm */ 4875 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4876 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4877 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4878 4879 ierr = PetscNew(&merge);CHKERRQ(ierr); 4880 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4881 4882 /* determine row ownership */ 4883 /*---------------------------------------------------------*/ 4884 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4885 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4886 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4887 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4888 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4889 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4890 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4891 4892 m = merge->rowmap->n; 4893 owners = merge->rowmap->range; 4894 4895 /* determine the number of messages to send, their lengths */ 4896 /*---------------------------------------------------------*/ 4897 len_s = merge->len_s; 4898 4899 len = 0; /* length of buf_si[] */ 4900 merge->nsend = 0; 4901 for (proc=0; proc<size; proc++) { 4902 len_si[proc] = 0; 4903 if (proc == rank) { 4904 len_s[proc] = 0; 4905 } else { 4906 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4907 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4908 } 4909 if (len_s[proc]) { 4910 merge->nsend++; 4911 nrows = 0; 4912 for (i=owners[proc]; i<owners[proc+1]; i++) { 4913 if (ai[i+1] > ai[i]) nrows++; 4914 } 4915 len_si[proc] = 2*(nrows+1); 4916 len += len_si[proc]; 4917 } 4918 } 4919 4920 /* determine the number and length of messages to receive for ij-structure */ 4921 /*-------------------------------------------------------------------------*/ 4922 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4923 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4924 4925 /* post the Irecv of j-structure */ 4926 /*-------------------------------*/ 4927 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4928 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4929 4930 /* post the Isend of j-structure */ 4931 /*--------------------------------*/ 4932 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4933 4934 for (proc=0, k=0; proc<size; proc++) { 4935 if (!len_s[proc]) continue; 4936 i = owners[proc]; 4937 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4938 k++; 4939 } 4940 4941 /* receives and sends of j-structure are complete */ 4942 /*------------------------------------------------*/ 4943 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4944 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4945 4946 /* send and recv i-structure */ 4947 /*---------------------------*/ 4948 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4949 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4950 4951 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4952 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4953 for (proc=0,k=0; proc<size; proc++) { 4954 if (!len_s[proc]) continue; 4955 /* form outgoing message for i-structure: 4956 buf_si[0]: nrows to be sent 4957 [1:nrows]: row index (global) 4958 [nrows+1:2*nrows+1]: i-structure index 4959 */ 4960 /*-------------------------------------------*/ 4961 nrows = len_si[proc]/2 - 1; 4962 buf_si_i = buf_si + nrows+1; 4963 buf_si[0] = nrows; 4964 buf_si_i[0] = 0; 4965 nrows = 0; 4966 for (i=owners[proc]; i<owners[proc+1]; i++) { 4967 anzi = ai[i+1] - ai[i]; 4968 if (anzi) { 4969 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4970 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4971 nrows++; 4972 } 4973 } 4974 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4975 k++; 4976 buf_si += len_si[proc]; 4977 } 4978 4979 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4980 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4981 4982 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4983 for (i=0; i<merge->nrecv; i++) { 4984 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4985 } 4986 4987 ierr = PetscFree(len_si);CHKERRQ(ierr); 4988 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4989 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4990 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4991 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4992 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4993 ierr = PetscFree(status);CHKERRQ(ierr); 4994 4995 /* compute a local seq matrix in each processor */ 4996 /*----------------------------------------------*/ 4997 /* allocate bi array and free space for accumulating nonzero column info */ 4998 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4999 bi[0] = 0; 5000 5001 /* create and initialize a linked list */ 5002 nlnk = N+1; 5003 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5004 5005 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5006 len = ai[owners[rank+1]] - ai[owners[rank]]; 5007 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 5008 5009 current_space = free_space; 5010 5011 /* determine symbolic info for each local row */ 5012 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 5013 5014 for (k=0; k<merge->nrecv; k++) { 5015 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5016 nrows = *buf_ri_k[k]; 5017 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5018 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 5019 } 5020 5021 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 5022 len = 0; 5023 for (i=0; i<m; i++) { 5024 bnzi = 0; 5025 /* add local non-zero cols of this proc's seqmat into lnk */ 5026 arow = owners[rank] + i; 5027 anzi = ai[arow+1] - ai[arow]; 5028 aj = a->j + ai[arow]; 5029 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5030 bnzi += nlnk; 5031 /* add received col data into lnk */ 5032 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 5033 if (i == *nextrow[k]) { /* i-th row */ 5034 anzi = *(nextai[k]+1) - *nextai[k]; 5035 aj = buf_rj[k] + *nextai[k]; 5036 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5037 bnzi += nlnk; 5038 nextrow[k]++; nextai[k]++; 5039 } 5040 } 5041 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5042 5043 /* if free space is not available, make more free space */ 5044 if (current_space->local_remaining<bnzi) { 5045 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 5046 nspacedouble++; 5047 } 5048 /* copy data into free space, then initialize lnk */ 5049 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5050 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5051 5052 current_space->array += bnzi; 5053 current_space->local_used += bnzi; 5054 current_space->local_remaining -= bnzi; 5055 5056 bi[i+1] = bi[i] + bnzi; 5057 } 5058 5059 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5060 5061 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 5062 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5063 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5064 5065 /* create symbolic parallel matrix B_mpi */ 5066 /*---------------------------------------*/ 5067 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5068 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5069 if (n==PETSC_DECIDE) { 5070 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5071 } else { 5072 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5073 } 5074 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5075 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5076 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5077 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5078 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5079 5080 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5081 B_mpi->assembled = PETSC_FALSE; 5082 merge->bi = bi; 5083 merge->bj = bj; 5084 merge->buf_ri = buf_ri; 5085 merge->buf_rj = buf_rj; 5086 merge->coi = NULL; 5087 merge->coj = NULL; 5088 merge->owners_co = NULL; 5089 5090 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5091 5092 /* attach the supporting struct to B_mpi for reuse */ 5093 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5094 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5095 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 5096 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5097 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5098 *mpimat = B_mpi; 5099 5100 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5101 PetscFunctionReturn(0); 5102 } 5103 5104 /*@C 5105 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5106 matrices from each processor 5107 5108 Collective 5109 5110 Input Parameters: 5111 + comm - the communicators the parallel matrix will live on 5112 . seqmat - the input sequential matrices 5113 . m - number of local rows (or PETSC_DECIDE) 5114 . n - number of local columns (or PETSC_DECIDE) 5115 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5116 5117 Output Parameter: 5118 . mpimat - the parallel matrix generated 5119 5120 Level: advanced 5121 5122 Notes: 5123 The dimensions of the sequential matrix in each processor MUST be the same. 5124 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5125 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5126 @*/ 5127 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5128 { 5129 PetscErrorCode ierr; 5130 PetscMPIInt size; 5131 5132 PetscFunctionBegin; 5133 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5134 if (size == 1) { 5135 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5136 if (scall == MAT_INITIAL_MATRIX) { 5137 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5138 } else { 5139 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5140 } 5141 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5142 PetscFunctionReturn(0); 5143 } 5144 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5145 if (scall == MAT_INITIAL_MATRIX) { 5146 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5147 } 5148 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5149 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5150 PetscFunctionReturn(0); 5151 } 5152 5153 /*@ 5154 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5155 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5156 with MatGetSize() 5157 5158 Not Collective 5159 5160 Input Parameters: 5161 + A - the matrix 5162 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5163 5164 Output Parameter: 5165 . A_loc - the local sequential matrix generated 5166 5167 Level: developer 5168 5169 Notes: 5170 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5171 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5172 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5173 modify the values of the returned A_loc. 5174 5175 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5176 5177 @*/ 5178 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5179 { 5180 PetscErrorCode ierr; 5181 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5182 Mat_SeqAIJ *mat,*a,*b; 5183 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5184 MatScalar *aa,*ba,*cam; 5185 PetscScalar *ca; 5186 PetscMPIInt size; 5187 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5188 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5189 PetscBool match; 5190 5191 PetscFunctionBegin; 5192 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5193 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5194 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 5195 if (size == 1) { 5196 if (scall == MAT_INITIAL_MATRIX) { 5197 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5198 *A_loc = mpimat->A; 5199 } else if (scall == MAT_REUSE_MATRIX) { 5200 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5201 } 5202 PetscFunctionReturn(0); 5203 } 5204 5205 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5206 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5207 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5208 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5209 aa = a->a; ba = b->a; 5210 if (scall == MAT_INITIAL_MATRIX) { 5211 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5212 ci[0] = 0; 5213 for (i=0; i<am; i++) { 5214 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5215 } 5216 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5217 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5218 k = 0; 5219 for (i=0; i<am; i++) { 5220 ncols_o = bi[i+1] - bi[i]; 5221 ncols_d = ai[i+1] - ai[i]; 5222 /* off-diagonal portion of A */ 5223 for (jo=0; jo<ncols_o; jo++) { 5224 col = cmap[*bj]; 5225 if (col >= cstart) break; 5226 cj[k] = col; bj++; 5227 ca[k++] = *ba++; 5228 } 5229 /* diagonal portion of A */ 5230 for (j=0; j<ncols_d; j++) { 5231 cj[k] = cstart + *aj++; 5232 ca[k++] = *aa++; 5233 } 5234 /* off-diagonal portion of A */ 5235 for (j=jo; j<ncols_o; j++) { 5236 cj[k] = cmap[*bj++]; 5237 ca[k++] = *ba++; 5238 } 5239 } 5240 /* put together the new matrix */ 5241 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5242 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5243 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5244 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5245 mat->free_a = PETSC_TRUE; 5246 mat->free_ij = PETSC_TRUE; 5247 mat->nonew = 0; 5248 } else if (scall == MAT_REUSE_MATRIX) { 5249 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5250 ci = mat->i; cj = mat->j; cam = mat->a; 5251 for (i=0; i<am; i++) { 5252 /* off-diagonal portion of A */ 5253 ncols_o = bi[i+1] - bi[i]; 5254 for (jo=0; jo<ncols_o; jo++) { 5255 col = cmap[*bj]; 5256 if (col >= cstart) break; 5257 *cam++ = *ba++; bj++; 5258 } 5259 /* diagonal portion of A */ 5260 ncols_d = ai[i+1] - ai[i]; 5261 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5262 /* off-diagonal portion of A */ 5263 for (j=jo; j<ncols_o; j++) { 5264 *cam++ = *ba++; bj++; 5265 } 5266 } 5267 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5268 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5269 PetscFunctionReturn(0); 5270 } 5271 5272 /*@C 5273 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5274 5275 Not Collective 5276 5277 Input Parameters: 5278 + A - the matrix 5279 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5280 - row, col - index sets of rows and columns to extract (or NULL) 5281 5282 Output Parameter: 5283 . A_loc - the local sequential matrix generated 5284 5285 Level: developer 5286 5287 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5288 5289 @*/ 5290 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5291 { 5292 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5293 PetscErrorCode ierr; 5294 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5295 IS isrowa,iscola; 5296 Mat *aloc; 5297 PetscBool match; 5298 5299 PetscFunctionBegin; 5300 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5301 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5302 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5303 if (!row) { 5304 start = A->rmap->rstart; end = A->rmap->rend; 5305 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5306 } else { 5307 isrowa = *row; 5308 } 5309 if (!col) { 5310 start = A->cmap->rstart; 5311 cmap = a->garray; 5312 nzA = a->A->cmap->n; 5313 nzB = a->B->cmap->n; 5314 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5315 ncols = 0; 5316 for (i=0; i<nzB; i++) { 5317 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5318 else break; 5319 } 5320 imark = i; 5321 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5322 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5323 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5324 } else { 5325 iscola = *col; 5326 } 5327 if (scall != MAT_INITIAL_MATRIX) { 5328 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5329 aloc[0] = *A_loc; 5330 } 5331 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5332 if (!col) { /* attach global id of condensed columns */ 5333 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5334 } 5335 *A_loc = aloc[0]; 5336 ierr = PetscFree(aloc);CHKERRQ(ierr); 5337 if (!row) { 5338 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5339 } 5340 if (!col) { 5341 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5342 } 5343 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5344 PetscFunctionReturn(0); 5345 } 5346 5347 /* 5348 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5349 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5350 * on a global size. 5351 * */ 5352 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5353 { 5354 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5355 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5356 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5357 PetscMPIInt owner; 5358 PetscSFNode *iremote,*oiremote; 5359 const PetscInt *lrowindices; 5360 PetscErrorCode ierr; 5361 PetscSF sf,osf; 5362 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5363 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5364 MPI_Comm comm; 5365 ISLocalToGlobalMapping mapping; 5366 5367 PetscFunctionBegin; 5368 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5369 /* plocalsize is the number of roots 5370 * nrows is the number of leaves 5371 * */ 5372 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5373 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5374 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5375 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5376 for (i=0;i<nrows;i++) { 5377 /* Find a remote index and an owner for a row 5378 * The row could be local or remote 5379 * */ 5380 owner = 0; 5381 lidx = 0; 5382 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5383 iremote[i].index = lidx; 5384 iremote[i].rank = owner; 5385 } 5386 /* Create SF to communicate how many nonzero columns for each row */ 5387 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5388 /* SF will figure out the number of nonzero colunms for each row, and their 5389 * offsets 5390 * */ 5391 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5392 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5393 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5394 5395 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5396 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5397 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5398 roffsets[0] = 0; 5399 roffsets[1] = 0; 5400 for (i=0;i<plocalsize;i++) { 5401 /* diag */ 5402 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5403 /* off diag */ 5404 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5405 /* compute offsets so that we relative location for each row */ 5406 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5407 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5408 } 5409 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5410 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5411 /* 'r' means root, and 'l' means leaf */ 5412 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5413 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5414 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5415 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5416 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5417 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5418 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5419 dntotalcols = 0; 5420 ontotalcols = 0; 5421 ncol = 0; 5422 for (i=0;i<nrows;i++) { 5423 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5424 ncol = PetscMax(pnnz[i],ncol); 5425 /* diag */ 5426 dntotalcols += nlcols[i*2+0]; 5427 /* off diag */ 5428 ontotalcols += nlcols[i*2+1]; 5429 } 5430 /* We do not need to figure the right number of columns 5431 * since all the calculations will be done by going through the raw data 5432 * */ 5433 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5434 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5435 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5436 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5437 /* diag */ 5438 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5439 /* off diag */ 5440 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5441 /* diag */ 5442 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5443 /* off diag */ 5444 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5445 dntotalcols = 0; 5446 ontotalcols = 0; 5447 ntotalcols = 0; 5448 for (i=0;i<nrows;i++) { 5449 owner = 0; 5450 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5451 /* Set iremote for diag matrix */ 5452 for (j=0;j<nlcols[i*2+0];j++) { 5453 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5454 iremote[dntotalcols].rank = owner; 5455 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5456 ilocal[dntotalcols++] = ntotalcols++; 5457 } 5458 /* off diag */ 5459 for (j=0;j<nlcols[i*2+1];j++) { 5460 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5461 oiremote[ontotalcols].rank = owner; 5462 oilocal[ontotalcols++] = ntotalcols++; 5463 } 5464 } 5465 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5466 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5467 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5468 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5469 /* P serves as roots and P_oth is leaves 5470 * Diag matrix 5471 * */ 5472 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5473 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5474 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5475 5476 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5477 /* Off diag */ 5478 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5479 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5480 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5481 /* We operate on the matrix internal data for saving memory */ 5482 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5483 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5484 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5485 /* Convert to global indices for diag matrix */ 5486 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5487 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5488 /* We want P_oth store global indices */ 5489 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5490 /* Use memory scalable approach */ 5491 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5492 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5493 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5494 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5495 /* Convert back to local indices */ 5496 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5497 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5498 nout = 0; 5499 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5500 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5501 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5502 /* Exchange values */ 5503 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5504 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5505 /* Stop PETSc from shrinking memory */ 5506 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5507 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5508 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5509 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5510 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5511 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5512 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5513 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5514 PetscFunctionReturn(0); 5515 } 5516 5517 /* 5518 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5519 * This supports MPIAIJ and MAIJ 5520 * */ 5521 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5522 { 5523 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5524 Mat_SeqAIJ *p_oth; 5525 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5526 IS rows,map; 5527 PetscHMapI hamp; 5528 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5529 MPI_Comm comm; 5530 PetscSF sf,osf; 5531 PetscBool has; 5532 PetscErrorCode ierr; 5533 5534 PetscFunctionBegin; 5535 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5536 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5537 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5538 * and then create a submatrix (that often is an overlapping matrix) 5539 * */ 5540 if (reuse == MAT_INITIAL_MATRIX) { 5541 /* Use a hash table to figure out unique keys */ 5542 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5543 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5544 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5545 count = 0; 5546 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5547 for (i=0;i<a->B->cmap->n;i++) { 5548 key = a->garray[i]/dof; 5549 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5550 if (!has) { 5551 mapping[i] = count; 5552 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5553 } else { 5554 /* Current 'i' has the same value the previous step */ 5555 mapping[i] = count-1; 5556 } 5557 } 5558 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5559 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5560 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5561 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5562 off = 0; 5563 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5564 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5565 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5566 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5567 /* In case, the matrix was already created but users want to recreate the matrix */ 5568 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5569 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5570 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5571 ierr = ISDestroy(&map);CHKERRQ(ierr); 5572 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5573 } else if (reuse == MAT_REUSE_MATRIX) { 5574 /* If matrix was already created, we simply update values using SF objects 5575 * that as attached to the matrix ealier. 5576 * */ 5577 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5578 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5579 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5580 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5581 /* Update values in place */ 5582 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5583 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5584 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5585 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5586 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5587 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5588 PetscFunctionReturn(0); 5589 } 5590 5591 /*@C 5592 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5593 5594 Collective on Mat 5595 5596 Input Parameters: 5597 + A,B - the matrices in mpiaij format 5598 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5599 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5600 5601 Output Parameter: 5602 + rowb, colb - index sets of rows and columns of B to extract 5603 - B_seq - the sequential matrix generated 5604 5605 Level: developer 5606 5607 @*/ 5608 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5609 { 5610 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5611 PetscErrorCode ierr; 5612 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5613 IS isrowb,iscolb; 5614 Mat *bseq=NULL; 5615 5616 PetscFunctionBegin; 5617 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5618 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5619 } 5620 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5621 5622 if (scall == MAT_INITIAL_MATRIX) { 5623 start = A->cmap->rstart; 5624 cmap = a->garray; 5625 nzA = a->A->cmap->n; 5626 nzB = a->B->cmap->n; 5627 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5628 ncols = 0; 5629 for (i=0; i<nzB; i++) { /* row < local row index */ 5630 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5631 else break; 5632 } 5633 imark = i; 5634 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5635 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5636 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5637 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5638 } else { 5639 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5640 isrowb = *rowb; iscolb = *colb; 5641 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5642 bseq[0] = *B_seq; 5643 } 5644 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5645 *B_seq = bseq[0]; 5646 ierr = PetscFree(bseq);CHKERRQ(ierr); 5647 if (!rowb) { 5648 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5649 } else { 5650 *rowb = isrowb; 5651 } 5652 if (!colb) { 5653 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5654 } else { 5655 *colb = iscolb; 5656 } 5657 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5658 PetscFunctionReturn(0); 5659 } 5660 5661 /* 5662 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5663 of the OFF-DIAGONAL portion of local A 5664 5665 Collective on Mat 5666 5667 Input Parameters: 5668 + A,B - the matrices in mpiaij format 5669 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5670 5671 Output Parameter: 5672 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5673 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5674 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5675 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5676 5677 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5678 for this matrix. This is not desirable.. 5679 5680 Level: developer 5681 5682 */ 5683 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5684 { 5685 PetscErrorCode ierr; 5686 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5687 Mat_SeqAIJ *b_oth; 5688 VecScatter ctx; 5689 MPI_Comm comm; 5690 const PetscMPIInt *rprocs,*sprocs; 5691 const PetscInt *srow,*rstarts,*sstarts; 5692 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5693 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5694 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5695 MPI_Request *rwaits = NULL,*swaits = NULL; 5696 MPI_Status rstatus; 5697 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5698 5699 PetscFunctionBegin; 5700 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5701 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5702 5703 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5704 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5705 } 5706 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5707 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5708 5709 if (size == 1) { 5710 startsj_s = NULL; 5711 bufa_ptr = NULL; 5712 *B_oth = NULL; 5713 PetscFunctionReturn(0); 5714 } 5715 5716 ctx = a->Mvctx; 5717 tag = ((PetscObject)ctx)->tag; 5718 5719 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5720 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5721 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5722 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5723 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5724 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5725 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5726 5727 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5728 if (scall == MAT_INITIAL_MATRIX) { 5729 /* i-array */ 5730 /*---------*/ 5731 /* post receives */ 5732 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5733 for (i=0; i<nrecvs; i++) { 5734 rowlen = rvalues + rstarts[i]*rbs; 5735 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5736 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5737 } 5738 5739 /* pack the outgoing message */ 5740 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5741 5742 sstartsj[0] = 0; 5743 rstartsj[0] = 0; 5744 len = 0; /* total length of j or a array to be sent */ 5745 if (nsends) { 5746 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5747 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5748 } 5749 for (i=0; i<nsends; i++) { 5750 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5751 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5752 for (j=0; j<nrows; j++) { 5753 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5754 for (l=0; l<sbs; l++) { 5755 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5756 5757 rowlen[j*sbs+l] = ncols; 5758 5759 len += ncols; 5760 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5761 } 5762 k++; 5763 } 5764 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5765 5766 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5767 } 5768 /* recvs and sends of i-array are completed */ 5769 i = nrecvs; 5770 while (i--) { 5771 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5772 } 5773 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5774 ierr = PetscFree(svalues);CHKERRQ(ierr); 5775 5776 /* allocate buffers for sending j and a arrays */ 5777 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5778 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5779 5780 /* create i-array of B_oth */ 5781 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5782 5783 b_othi[0] = 0; 5784 len = 0; /* total length of j or a array to be received */ 5785 k = 0; 5786 for (i=0; i<nrecvs; i++) { 5787 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5788 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5789 for (j=0; j<nrows; j++) { 5790 b_othi[k+1] = b_othi[k] + rowlen[j]; 5791 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5792 k++; 5793 } 5794 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5795 } 5796 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5797 5798 /* allocate space for j and a arrrays of B_oth */ 5799 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5800 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5801 5802 /* j-array */ 5803 /*---------*/ 5804 /* post receives of j-array */ 5805 for (i=0; i<nrecvs; i++) { 5806 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5807 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5808 } 5809 5810 /* pack the outgoing message j-array */ 5811 if (nsends) k = sstarts[0]; 5812 for (i=0; i<nsends; i++) { 5813 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5814 bufJ = bufj+sstartsj[i]; 5815 for (j=0; j<nrows; j++) { 5816 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5817 for (ll=0; ll<sbs; ll++) { 5818 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5819 for (l=0; l<ncols; l++) { 5820 *bufJ++ = cols[l]; 5821 } 5822 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5823 } 5824 } 5825 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5826 } 5827 5828 /* recvs and sends of j-array are completed */ 5829 i = nrecvs; 5830 while (i--) { 5831 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5832 } 5833 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5834 } else if (scall == MAT_REUSE_MATRIX) { 5835 sstartsj = *startsj_s; 5836 rstartsj = *startsj_r; 5837 bufa = *bufa_ptr; 5838 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5839 b_otha = b_oth->a; 5840 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5841 5842 /* a-array */ 5843 /*---------*/ 5844 /* post receives of a-array */ 5845 for (i=0; i<nrecvs; i++) { 5846 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5847 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5848 } 5849 5850 /* pack the outgoing message a-array */ 5851 if (nsends) k = sstarts[0]; 5852 for (i=0; i<nsends; i++) { 5853 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5854 bufA = bufa+sstartsj[i]; 5855 for (j=0; j<nrows; j++) { 5856 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5857 for (ll=0; ll<sbs; ll++) { 5858 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5859 for (l=0; l<ncols; l++) { 5860 *bufA++ = vals[l]; 5861 } 5862 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5863 } 5864 } 5865 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5866 } 5867 /* recvs and sends of a-array are completed */ 5868 i = nrecvs; 5869 while (i--) { 5870 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5871 } 5872 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5873 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5874 5875 if (scall == MAT_INITIAL_MATRIX) { 5876 /* put together the new matrix */ 5877 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5878 5879 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5880 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5881 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5882 b_oth->free_a = PETSC_TRUE; 5883 b_oth->free_ij = PETSC_TRUE; 5884 b_oth->nonew = 0; 5885 5886 ierr = PetscFree(bufj);CHKERRQ(ierr); 5887 if (!startsj_s || !bufa_ptr) { 5888 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5889 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5890 } else { 5891 *startsj_s = sstartsj; 5892 *startsj_r = rstartsj; 5893 *bufa_ptr = bufa; 5894 } 5895 } 5896 5897 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5898 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5899 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5900 PetscFunctionReturn(0); 5901 } 5902 5903 /*@C 5904 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5905 5906 Not Collective 5907 5908 Input Parameters: 5909 . A - The matrix in mpiaij format 5910 5911 Output Parameter: 5912 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5913 . colmap - A map from global column index to local index into lvec 5914 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5915 5916 Level: developer 5917 5918 @*/ 5919 #if defined(PETSC_USE_CTABLE) 5920 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5921 #else 5922 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5923 #endif 5924 { 5925 Mat_MPIAIJ *a; 5926 5927 PetscFunctionBegin; 5928 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5929 PetscValidPointer(lvec, 2); 5930 PetscValidPointer(colmap, 3); 5931 PetscValidPointer(multScatter, 4); 5932 a = (Mat_MPIAIJ*) A->data; 5933 if (lvec) *lvec = a->lvec; 5934 if (colmap) *colmap = a->colmap; 5935 if (multScatter) *multScatter = a->Mvctx; 5936 PetscFunctionReturn(0); 5937 } 5938 5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5940 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5941 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5942 #if defined(PETSC_HAVE_MKL_SPARSE) 5943 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5944 #endif 5945 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5946 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5947 #if defined(PETSC_HAVE_ELEMENTAL) 5948 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5949 #endif 5950 #if defined(PETSC_HAVE_SCALAPACK) 5951 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5952 #endif 5953 #if defined(PETSC_HAVE_HYPRE) 5954 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5955 #endif 5956 #if defined(PETSC_HAVE_CUDA) 5957 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5958 #endif 5959 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5960 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5961 #endif 5962 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5963 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5964 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5965 5966 /* 5967 Computes (B'*A')' since computing B*A directly is untenable 5968 5969 n p p 5970 [ ] [ ] [ ] 5971 m [ A ] * n [ B ] = m [ C ] 5972 [ ] [ ] [ ] 5973 5974 */ 5975 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5976 { 5977 PetscErrorCode ierr; 5978 Mat At,Bt,Ct; 5979 5980 PetscFunctionBegin; 5981 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5982 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5983 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5984 ierr = MatDestroy(&At);CHKERRQ(ierr); 5985 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5986 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5987 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5988 PetscFunctionReturn(0); 5989 } 5990 5991 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5992 { 5993 PetscErrorCode ierr; 5994 PetscBool cisdense; 5995 5996 PetscFunctionBegin; 5997 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5998 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5999 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 6000 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 6001 if (!cisdense) { 6002 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6003 } 6004 ierr = MatSetUp(C);CHKERRQ(ierr); 6005 6006 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6007 PetscFunctionReturn(0); 6008 } 6009 6010 /* ----------------------------------------------------------------*/ 6011 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6012 { 6013 Mat_Product *product = C->product; 6014 Mat A = product->A,B=product->B; 6015 6016 PetscFunctionBegin; 6017 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6018 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6019 6020 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6021 C->ops->productsymbolic = MatProductSymbolic_AB; 6022 PetscFunctionReturn(0); 6023 } 6024 6025 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6026 { 6027 PetscErrorCode ierr; 6028 Mat_Product *product = C->product; 6029 6030 PetscFunctionBegin; 6031 if (product->type == MATPRODUCT_AB) { 6032 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6033 } 6034 PetscFunctionReturn(0); 6035 } 6036 /* ----------------------------------------------------------------*/ 6037 6038 /*MC 6039 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6040 6041 Options Database Keys: 6042 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6043 6044 Level: beginner 6045 6046 Notes: 6047 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6048 in this case the values associated with the rows and columns one passes in are set to zero 6049 in the matrix 6050 6051 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6052 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6053 6054 .seealso: MatCreateAIJ() 6055 M*/ 6056 6057 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6058 { 6059 Mat_MPIAIJ *b; 6060 PetscErrorCode ierr; 6061 PetscMPIInt size; 6062 6063 PetscFunctionBegin; 6064 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 6065 6066 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6067 B->data = (void*)b; 6068 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6069 B->assembled = PETSC_FALSE; 6070 B->insertmode = NOT_SET_VALUES; 6071 b->size = size; 6072 6073 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 6074 6075 /* build cache for off array entries formed */ 6076 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6077 6078 b->donotstash = PETSC_FALSE; 6079 b->colmap = NULL; 6080 b->garray = NULL; 6081 b->roworiented = PETSC_TRUE; 6082 6083 /* stuff used for matrix vector multiply */ 6084 b->lvec = NULL; 6085 b->Mvctx = NULL; 6086 6087 /* stuff for MatGetRow() */ 6088 b->rowindices = NULL; 6089 b->rowvalues = NULL; 6090 b->getrowactive = PETSC_FALSE; 6091 6092 /* flexible pointer used in CUSP/CUSPARSE classes */ 6093 b->spptr = NULL; 6094 6095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6105 #if defined(PETSC_HAVE_CUDA) 6106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6107 #endif 6108 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6110 #endif 6111 #if defined(PETSC_HAVE_MKL_SPARSE) 6112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6113 #endif 6114 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6116 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6117 #if defined(PETSC_HAVE_ELEMENTAL) 6118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6119 #endif 6120 #if defined(PETSC_HAVE_SCALAPACK) 6121 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6122 #endif 6123 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6124 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6125 #if defined(PETSC_HAVE_HYPRE) 6126 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6127 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6128 #endif 6129 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6130 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6131 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6132 PetscFunctionReturn(0); 6133 } 6134 6135 /*@C 6136 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6137 and "off-diagonal" part of the matrix in CSR format. 6138 6139 Collective 6140 6141 Input Parameters: 6142 + comm - MPI communicator 6143 . m - number of local rows (Cannot be PETSC_DECIDE) 6144 . n - This value should be the same as the local size used in creating the 6145 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6146 calculated if N is given) For square matrices n is almost always m. 6147 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6148 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6149 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6150 . j - column indices 6151 . a - matrix values 6152 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6153 . oj - column indices 6154 - oa - matrix values 6155 6156 Output Parameter: 6157 . mat - the matrix 6158 6159 Level: advanced 6160 6161 Notes: 6162 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6163 must free the arrays once the matrix has been destroyed and not before. 6164 6165 The i and j indices are 0 based 6166 6167 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6168 6169 This sets local rows and cannot be used to set off-processor values. 6170 6171 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6172 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6173 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6174 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6175 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6176 communication if it is known that only local entries will be set. 6177 6178 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6179 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6180 @*/ 6181 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6182 { 6183 PetscErrorCode ierr; 6184 Mat_MPIAIJ *maij; 6185 6186 PetscFunctionBegin; 6187 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6188 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6189 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6190 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6191 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6192 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6193 maij = (Mat_MPIAIJ*) (*mat)->data; 6194 6195 (*mat)->preallocated = PETSC_TRUE; 6196 6197 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6198 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6199 6200 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6201 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6202 6203 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6204 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6205 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6206 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6207 6208 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6209 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6210 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6211 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6212 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6213 PetscFunctionReturn(0); 6214 } 6215 6216 /* 6217 Special version for direct calls from Fortran 6218 */ 6219 #include <petsc/private/fortranimpl.h> 6220 6221 /* Change these macros so can be used in void function */ 6222 #undef CHKERRQ 6223 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6224 #undef SETERRQ2 6225 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6226 #undef SETERRQ3 6227 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6228 #undef SETERRQ 6229 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6230 6231 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6232 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6233 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6234 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6235 #else 6236 #endif 6237 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6238 { 6239 Mat mat = *mmat; 6240 PetscInt m = *mm, n = *mn; 6241 InsertMode addv = *maddv; 6242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6243 PetscScalar value; 6244 PetscErrorCode ierr; 6245 6246 MatCheckPreallocated(mat,1); 6247 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6248 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6249 { 6250 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6251 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6252 PetscBool roworiented = aij->roworiented; 6253 6254 /* Some Variables required in the macro */ 6255 Mat A = aij->A; 6256 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6257 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6258 MatScalar *aa = a->a; 6259 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6260 Mat B = aij->B; 6261 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6262 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6263 MatScalar *ba = b->a; 6264 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6265 * cannot use "#if defined" inside a macro. */ 6266 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6267 6268 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6269 PetscInt nonew = a->nonew; 6270 MatScalar *ap1,*ap2; 6271 6272 PetscFunctionBegin; 6273 for (i=0; i<m; i++) { 6274 if (im[i] < 0) continue; 6275 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6276 if (im[i] >= rstart && im[i] < rend) { 6277 row = im[i] - rstart; 6278 lastcol1 = -1; 6279 rp1 = aj + ai[row]; 6280 ap1 = aa + ai[row]; 6281 rmax1 = aimax[row]; 6282 nrow1 = ailen[row]; 6283 low1 = 0; 6284 high1 = nrow1; 6285 lastcol2 = -1; 6286 rp2 = bj + bi[row]; 6287 ap2 = ba + bi[row]; 6288 rmax2 = bimax[row]; 6289 nrow2 = bilen[row]; 6290 low2 = 0; 6291 high2 = nrow2; 6292 6293 for (j=0; j<n; j++) { 6294 if (roworiented) value = v[i*n+j]; 6295 else value = v[i+j*m]; 6296 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6297 if (in[j] >= cstart && in[j] < cend) { 6298 col = in[j] - cstart; 6299 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6300 #if defined(PETSC_HAVE_DEVICE) 6301 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6302 #endif 6303 } else if (in[j] < 0) continue; 6304 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6305 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6306 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6307 } else { 6308 if (mat->was_assembled) { 6309 if (!aij->colmap) { 6310 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6311 } 6312 #if defined(PETSC_USE_CTABLE) 6313 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6314 col--; 6315 #else 6316 col = aij->colmap[in[j]] - 1; 6317 #endif 6318 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6319 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6320 col = in[j]; 6321 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6322 B = aij->B; 6323 b = (Mat_SeqAIJ*)B->data; 6324 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6325 rp2 = bj + bi[row]; 6326 ap2 = ba + bi[row]; 6327 rmax2 = bimax[row]; 6328 nrow2 = bilen[row]; 6329 low2 = 0; 6330 high2 = nrow2; 6331 bm = aij->B->rmap->n; 6332 ba = b->a; 6333 inserted = PETSC_FALSE; 6334 } 6335 } else col = in[j]; 6336 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6337 #if defined(PETSC_HAVE_DEVICE) 6338 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6339 #endif 6340 } 6341 } 6342 } else if (!aij->donotstash) { 6343 if (roworiented) { 6344 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6345 } else { 6346 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6347 } 6348 } 6349 } 6350 } 6351 PetscFunctionReturnVoid(); 6352 } 6353