1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = NULL; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=NULL; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_DEVICE) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 583 if (im[i] >= rstart && im[i] < rend) { 584 row = im[i] - rstart; 585 lastcol1 = -1; 586 rp1 = aj + ai[row]; 587 ap1 = aa + ai[row]; 588 rmax1 = aimax[row]; 589 nrow1 = ailen[row]; 590 low1 = 0; 591 high1 = nrow1; 592 lastcol2 = -1; 593 rp2 = bj + bi[row]; 594 ap2 = ba + bi[row]; 595 rmax2 = bimax[row]; 596 nrow2 = bilen[row]; 597 low2 = 0; 598 high2 = nrow2; 599 600 for (j=0; j<n; j++) { 601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 603 if (in[j] >= cstart && in[j] < cend) { 604 col = in[j] - cstart; 605 nonew = a->nonew; 606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 607 #if defined(PETSC_HAVE_DEVICE) 608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 609 #endif 610 } else if (in[j] < 0) continue; 611 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 612 else { 613 if (mat->was_assembled) { 614 if (!aij->colmap) { 615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 616 } 617 #if defined(PETSC_USE_CTABLE) 618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 619 col--; 620 #else 621 col = aij->colmap[in[j]] - 1; 622 #endif 623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 625 col = in[j]; 626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 627 B = aij->B; 628 b = (Mat_SeqAIJ*)B->data; 629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 630 rp2 = bj + bi[row]; 631 ap2 = ba + bi[row]; 632 rmax2 = bimax[row]; 633 nrow2 = bilen[row]; 634 low2 = 0; 635 high2 = nrow2; 636 bm = aij->B->rmap->n; 637 ba = b->a; 638 inserted = PETSC_FALSE; 639 } else if (col < 0) { 640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 643 } 644 } else col = in[j]; 645 nonew = b->nonew; 646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 647 #if defined(PETSC_HAVE_DEVICE) 648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 649 #endif 650 } 651 } 652 } else { 653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 654 if (!aij->donotstash) { 655 mat->assembled = PETSC_FALSE; 656 if (roworiented) { 657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 658 } else { 659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 /* 668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 671 */ 672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 673 { 674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 675 Mat A = aij->A; /* diagonal part of the matrix */ 676 Mat B = aij->B; /* offdiagonal part of the matrix */ 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 680 PetscInt *ailen = a->ilen,*aj = a->j; 681 PetscInt *bilen = b->ilen,*bj = b->j; 682 PetscInt am = aij->A->rmap->n,j; 683 PetscInt diag_so_far = 0,dnz; 684 PetscInt offd_so_far = 0,onz; 685 686 PetscFunctionBegin; 687 /* Iterate over all rows of the matrix */ 688 for (j=0; j<am; j++) { 689 dnz = onz = 0; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[diag_so_far++] = mat_j[col] - cstart; 695 dnz++; 696 } else { /* off-diagonal entries */ 697 bj[offd_so_far++] = mat_j[col]; 698 onz++; 699 } 700 } 701 ailen[j] = dnz; 702 bilen[j] = onz; 703 } 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 713 */ 714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 717 Mat A = aij->A; /* diagonal part of the matrix */ 718 Mat B = aij->B; /* offdiagonal part of the matrix */ 719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 723 PetscInt *ailen = a->ilen,*aj = a->j; 724 PetscInt *bilen = b->ilen,*bj = b->j; 725 PetscInt am = aij->A->rmap->n,j; 726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 728 PetscScalar *aa = a->a,*ba = b->a; 729 730 PetscFunctionBegin; 731 /* Iterate over all rows of the matrix */ 732 for (j=0; j<am; j++) { 733 dnz_row = onz_row = 0; 734 rowstart_offd = full_offd_i[j]; 735 rowstart_diag = full_diag_i[j]; 736 /* Iterate over all non-zero columns of the current row */ 737 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 738 /* If column is in the diagonal */ 739 if (mat_j[col] >= cstart && mat_j[col] < cend) { 740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 741 aa[rowstart_diag+dnz_row] = mat_a[col]; 742 dnz_row++; 743 } else { /* off-diagonal entries */ 744 bj[rowstart_offd+onz_row] = mat_j[col]; 745 ba[rowstart_offd+onz_row] = mat_a[col]; 746 onz_row++; 747 } 748 } 749 ailen[j] = dnz_row; 750 bilen[j] = onz_row; 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 766 if (idxm[i] >= rstart && idxm[i] < rend) { 767 row = idxm[i] - rstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 771 if (idxn[j] >= cstart && idxn[j] < cend) { 772 col = idxn[j] - cstart; 773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!aij->colmap) { 776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined(PETSC_USE_CTABLE) 779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 780 col--; 781 #else 782 col = aij->colmap[idxn[j]] - 1; 783 #endif 784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 785 else { 786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } 788 } 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 796 797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 803 PetscFunctionBegin; 804 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 805 806 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 807 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 808 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 813 { 814 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 816 PetscErrorCode ierr; 817 PetscMPIInt n; 818 PetscInt i,j,rstart,ncols,flg; 819 PetscInt *row,*col; 820 PetscBool other_disassembled; 821 PetscScalar *val; 822 823 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 824 825 PetscFunctionBegin; 826 if (!aij->donotstash && !mat->nooffprocentries) { 827 while (1) { 828 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 829 if (!flg) break; 830 831 for (i=0; i<n;) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for (j=i,rstart=row[j]; j<n; j++) { 834 if (row[j] != rstart) break; 835 } 836 if (j < n) ncols = j-i; 837 else ncols = n-i; 838 /* Now assemble all these values with a single function call */ 839 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 840 i = j; 841 } 842 } 843 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 844 } 845 #if defined(PETSC_HAVE_DEVICE) 846 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 847 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 848 if (mat->boundtocpu) { 849 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 850 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 851 } 852 #endif 853 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 855 856 /* determine if any processor has disassembled, if so we must 857 also disassemble ourself, in order that we may reassemble. */ 858 /* 859 if nonzero structure of submatrix B cannot change then we know that 860 no processor disassembled thus we can skip this stuff 861 */ 862 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 863 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 864 if (mat->was_assembled && !other_disassembled) { 865 #if defined(PETSC_HAVE_DEVICE) 866 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 867 #endif 868 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 869 } 870 } 871 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 872 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 875 #if defined(PETSC_HAVE_DEVICE) 876 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 877 #endif 878 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 879 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 880 881 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 882 883 aij->rowvalues = NULL; 884 885 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 886 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 890 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 891 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 892 } 893 #if defined(PETSC_HAVE_DEVICE) 894 mat->offloadmask = PETSC_OFFLOAD_BOTH; 895 #endif 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 900 { 901 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 906 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 913 PetscObjectState sA, sB; 914 PetscInt *lrows; 915 PetscInt r, len; 916 PetscBool cong, lch, gch; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* get locally owned rows */ 921 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 922 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 923 /* fix right hand side if needed */ 924 if (x && b) { 925 const PetscScalar *xx; 926 PetscScalar *bb; 927 928 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 929 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 930 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 931 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 932 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 933 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 934 } 935 936 sA = mat->A->nonzerostate; 937 sB = mat->B->nonzerostate; 938 939 if (diag != 0.0 && cong) { 940 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 941 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 942 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 943 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 944 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 945 PetscInt nnwA, nnwB; 946 PetscBool nnzA, nnzB; 947 948 nnwA = aijA->nonew; 949 nnwB = aijB->nonew; 950 nnzA = aijA->keepnonzeropattern; 951 nnzB = aijB->keepnonzeropattern; 952 if (!nnzA) { 953 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 954 aijA->nonew = 0; 955 } 956 if (!nnzB) { 957 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 958 aijB->nonew = 0; 959 } 960 /* Must zero here before the next loop */ 961 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 962 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 for (r = 0; r < len; ++r) { 964 const PetscInt row = lrows[r] + A->rmap->rstart; 965 if (row >= A->cmap->N) continue; 966 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 967 } 968 aijA->nonew = nnwA; 969 aijB->nonew = nnwB; 970 } else { 971 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 972 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 } 974 ierr = PetscFree(lrows);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 978 /* reduce nonzerostate */ 979 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 980 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 981 if (gch) A->nonzerostate++; 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 986 { 987 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 988 PetscErrorCode ierr; 989 PetscMPIInt n = A->rmap->n; 990 PetscInt i,j,r,m,len = 0; 991 PetscInt *lrows,*owners = A->rmap->range; 992 PetscMPIInt p = 0; 993 PetscSFNode *rrows; 994 PetscSF sf; 995 const PetscScalar *xx; 996 PetscScalar *bb,*mask; 997 Vec xmask,lmask; 998 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 999 const PetscInt *aj, *ii,*ridx; 1000 PetscScalar *aa; 1001 1002 PetscFunctionBegin; 1003 /* Create SF where leaves are input rows and roots are owned rows */ 1004 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1005 for (r = 0; r < n; ++r) lrows[r] = -1; 1006 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1007 for (r = 0; r < N; ++r) { 1008 const PetscInt idx = rows[r]; 1009 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1010 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1011 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1012 } 1013 rrows[r].rank = p; 1014 rrows[r].index = rows[r] - owners[p]; 1015 } 1016 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1017 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1018 /* Collect flags for rows to be zeroed */ 1019 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1020 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1021 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1022 /* Compress and put in row numbers */ 1023 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1024 /* zero diagonal part of matrix */ 1025 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1026 /* handle off diagonal part of matrix */ 1027 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1028 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1029 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1030 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1031 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1032 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1035 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1036 PetscBool cong; 1037 1038 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1039 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1040 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1043 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1044 } 1045 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1046 /* remove zeroed rows of off diagonal matrix */ 1047 ii = aij->i; 1048 for (i=0; i<len; i++) { 1049 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1050 } 1051 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1052 if (aij->compressedrow.use) { 1053 m = aij->compressedrow.nrows; 1054 ii = aij->compressedrow.i; 1055 ridx = aij->compressedrow.rindex; 1056 for (i=0; i<m; i++) { 1057 n = ii[i+1] - ii[i]; 1058 aj = aij->j + ii[i]; 1059 aa = aij->a + ii[i]; 1060 1061 for (j=0; j<n; j++) { 1062 if (PetscAbsScalar(mask[*aj])) { 1063 if (b) bb[*ridx] -= *aa*xx[*aj]; 1064 *aa = 0.0; 1065 } 1066 aa++; 1067 aj++; 1068 } 1069 ridx++; 1070 } 1071 } else { /* do not use compressed row format */ 1072 m = l->B->rmap->n; 1073 for (i=0; i<m; i++) { 1074 n = ii[i+1] - ii[i]; 1075 aj = aij->j + ii[i]; 1076 aa = aij->a + ii[i]; 1077 for (j=0; j<n; j++) { 1078 if (PetscAbsScalar(mask[*aj])) { 1079 if (b) bb[i] -= *aa*xx[*aj]; 1080 *aa = 0.0; 1081 } 1082 aa++; 1083 aj++; 1084 } 1085 } 1086 } 1087 if (x && b) { 1088 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1089 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1090 } 1091 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1092 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1093 ierr = PetscFree(lrows);CHKERRQ(ierr); 1094 1095 /* only change matrix nonzero state if pattern was allowed to be changed */ 1096 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1097 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1098 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 VecScatter Mvctx = a->Mvctx; 1109 1110 PetscFunctionBegin; 1111 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1112 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1113 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1115 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 VecScatter Mvctx = a->Mvctx; 1135 1136 PetscFunctionBegin; 1137 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1138 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 /* do local part */ 1154 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1155 /* add partial results together */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1162 { 1163 MPI_Comm comm; 1164 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1165 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1166 IS Me,Notme; 1167 PetscErrorCode ierr; 1168 PetscInt M,N,first,last,*notme,i; 1169 PetscBool lf; 1170 PetscMPIInt size; 1171 1172 PetscFunctionBegin; 1173 /* Easy test: symmetric diagonal block */ 1174 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1175 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1176 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1177 if (!*f) PetscFunctionReturn(0); 1178 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1180 if (size == 1) PetscFunctionReturn(0); 1181 1182 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1183 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1184 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1185 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1186 for (i=0; i<first; i++) notme[i] = i; 1187 for (i=last; i<M; i++) notme[i-last+first] = i; 1188 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1189 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1190 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1191 Aoff = Aoffs[0]; 1192 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1193 Boff = Boffs[0]; 1194 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1195 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1196 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1197 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1198 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1199 ierr = PetscFree(notme);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1204 { 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1213 { 1214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 /* do nondiagonal part */ 1219 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1220 /* do local part */ 1221 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1222 /* add partial results together */ 1223 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /* 1229 This only works correctly for square matrices where the subblock A->A is the 1230 diagonal block 1231 */ 1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1233 { 1234 PetscErrorCode ierr; 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 1237 PetscFunctionBegin; 1238 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1239 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1240 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1245 { 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1251 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1256 { 1257 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 #if defined(PETSC_USE_LOG) 1262 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1263 #endif 1264 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1266 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1267 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1268 #if defined(PETSC_USE_CTABLE) 1269 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1270 #else 1271 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1272 #endif 1273 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1274 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1275 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1276 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1277 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1278 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1279 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1280 1281 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1282 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1283 1284 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1294 #if defined(PETSC_HAVE_ELEMENTAL) 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1296 #endif 1297 #if defined(PETSC_HAVE_SCALAPACK) 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1299 #endif 1300 #if defined(PETSC_HAVE_HYPRE) 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1303 #endif 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1314 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1315 const PetscInt *garray = aij->garray; 1316 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1317 PetscInt *rowlens; 1318 PetscInt *colidxs; 1319 PetscScalar *matvals; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1324 1325 M = mat->rmap->N; 1326 N = mat->cmap->N; 1327 m = mat->rmap->n; 1328 rs = mat->rmap->rstart; 1329 cs = mat->cmap->rstart; 1330 nz = A->nz + B->nz; 1331 1332 /* write matrix header */ 1333 header[0] = MAT_FILE_CLASSID; 1334 header[1] = M; header[2] = N; header[3] = nz; 1335 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1336 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1337 1338 /* fill in and store row lengths */ 1339 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1340 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1341 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1342 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1343 1344 /* fill in and store column indices */ 1345 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1346 for (cnt=0, i=0; i<m; i++) { 1347 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1348 if (garray[B->j[jb]] > cs) break; 1349 colidxs[cnt++] = garray[B->j[jb]]; 1350 } 1351 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1352 colidxs[cnt++] = A->j[ja] + cs; 1353 for (; jb<B->i[i+1]; jb++) 1354 colidxs[cnt++] = garray[B->j[jb]]; 1355 } 1356 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1357 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1358 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1359 1360 /* fill in and store nonzero values */ 1361 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1362 for (cnt=0, i=0; i<m; i++) { 1363 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1364 if (garray[B->j[jb]] > cs) break; 1365 matvals[cnt++] = B->a[jb]; 1366 } 1367 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1368 matvals[cnt++] = A->a[ja]; 1369 for (; jb<B->i[i+1]; jb++) 1370 matvals[cnt++] = B->a[jb]; 1371 } 1372 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1373 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1374 ierr = PetscFree(matvals);CHKERRQ(ierr); 1375 1376 /* write block size option to the viewer's .info file */ 1377 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #include <petscdraw.h> 1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1383 { 1384 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1385 PetscErrorCode ierr; 1386 PetscMPIInt rank = aij->rank,size = aij->size; 1387 PetscBool isdraw,iascii,isbinary; 1388 PetscViewer sviewer; 1389 PetscViewerFormat format; 1390 1391 PetscFunctionBegin; 1392 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1393 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1394 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1395 if (iascii) { 1396 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1397 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1398 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1399 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1400 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1401 for (i=0; i<(PetscInt)size; i++) { 1402 nmax = PetscMax(nmax,nz[i]); 1403 nmin = PetscMin(nmin,nz[i]); 1404 navg += nz[i]; 1405 } 1406 ierr = PetscFree(nz);CHKERRQ(ierr); 1407 navg = navg/size; 1408 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1412 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1413 MatInfo info; 1414 PetscBool inodes; 1415 1416 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1417 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1418 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1420 if (!inodes) { 1421 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1422 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1423 } else { 1424 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1425 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1426 } 1427 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1429 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1430 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1431 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1432 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1433 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1434 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1437 PetscInt inodecount,inodelimit,*inodes; 1438 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1439 if (inodes) { 1440 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1441 } else { 1442 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1446 PetscFunctionReturn(0); 1447 } 1448 } else if (isbinary) { 1449 if (size == 1) { 1450 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1451 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1452 } else { 1453 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } else if (iascii && size == 1) { 1457 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1458 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } else if (isdraw) { 1461 PetscDraw draw; 1462 PetscBool isnull; 1463 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1464 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1465 if (isnull) PetscFunctionReturn(0); 1466 } 1467 1468 { /* assemble the entire matrix onto first processor */ 1469 Mat A = NULL, Av; 1470 IS isrow,iscol; 1471 1472 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1473 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1474 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1475 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1476 /* The commented code uses MatCreateSubMatrices instead */ 1477 /* 1478 Mat *AA, A = NULL, Av; 1479 IS isrow,iscol; 1480 1481 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1482 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1483 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1484 if (!rank) { 1485 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1486 A = AA[0]; 1487 Av = AA[0]; 1488 } 1489 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1490 */ 1491 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1492 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1493 /* 1494 Everyone has to call to draw the matrix since the graphics waits are 1495 synchronized across all processors that share the PetscDraw object 1496 */ 1497 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1498 if (!rank) { 1499 if (((PetscObject)mat)->name) { 1500 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1501 } 1502 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1503 } 1504 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1505 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1506 ierr = MatDestroy(&A);CHKERRQ(ierr); 1507 } 1508 PetscFunctionReturn(0); 1509 } 1510 1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1512 { 1513 PetscErrorCode ierr; 1514 PetscBool iascii,isdraw,issocket,isbinary; 1515 1516 PetscFunctionBegin; 1517 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1518 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1519 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1521 if (iascii || isdraw || isbinary || issocket) { 1522 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1528 { 1529 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1530 PetscErrorCode ierr; 1531 Vec bb1 = NULL; 1532 PetscBool hasop; 1533 1534 PetscFunctionBegin; 1535 if (flag == SOR_APPLY_UPPER) { 1536 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1541 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1542 } 1543 1544 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1545 if (flag & SOR_ZERO_INITIAL_GUESS) { 1546 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1547 its--; 1548 } 1549 1550 while (its--) { 1551 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1552 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1553 1554 /* update rhs: bb1 = bb - B*x */ 1555 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1556 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1557 1558 /* local sweep */ 1559 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1560 } 1561 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1562 if (flag & SOR_ZERO_INITIAL_GUESS) { 1563 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1564 its--; 1565 } 1566 while (its--) { 1567 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 1570 /* update rhs: bb1 = bb - B*x */ 1571 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1572 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1573 1574 /* local sweep */ 1575 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1576 } 1577 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1578 if (flag & SOR_ZERO_INITIAL_GUESS) { 1579 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1580 its--; 1581 } 1582 while (its--) { 1583 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 1586 /* update rhs: bb1 = bb - B*x */ 1587 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1588 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1589 1590 /* local sweep */ 1591 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1592 } 1593 } else if (flag & SOR_EISENSTAT) { 1594 Vec xx1; 1595 1596 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1597 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1598 1599 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1600 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1601 if (!mat->diag) { 1602 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1603 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1604 } 1605 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1606 if (hasop) { 1607 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1608 } else { 1609 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1610 } 1611 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1612 1613 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1614 1615 /* local sweep */ 1616 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1617 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1618 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1619 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1620 1621 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1622 1623 matin->factorerrortype = mat->A->factorerrortype; 1624 PetscFunctionReturn(0); 1625 } 1626 1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1628 { 1629 Mat aA,aB,Aperm; 1630 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1631 PetscScalar *aa,*ba; 1632 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1633 PetscSF rowsf,sf; 1634 IS parcolp = NULL; 1635 PetscBool done; 1636 PetscErrorCode ierr; 1637 1638 PetscFunctionBegin; 1639 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1640 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1641 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1642 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1643 1644 /* Invert row permutation to find out where my rows should go */ 1645 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1646 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1647 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1648 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1649 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1650 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1651 1652 /* Invert column permutation to find out where my columns should go */ 1653 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1654 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1655 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1656 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1657 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1658 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1659 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1660 1661 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1662 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1663 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1664 1665 /* Find out where my gcols should go */ 1666 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1667 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1668 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1669 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1670 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1671 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1672 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1673 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1674 1675 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1676 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1677 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1678 for (i=0; i<m; i++) { 1679 PetscInt row = rdest[i]; 1680 PetscMPIInt rowner; 1681 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1682 for (j=ai[i]; j<ai[i+1]; j++) { 1683 PetscInt col = cdest[aj[j]]; 1684 PetscMPIInt cowner; 1685 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1686 if (rowner == cowner) dnnz[i]++; 1687 else onnz[i]++; 1688 } 1689 for (j=bi[i]; j<bi[i+1]; j++) { 1690 PetscInt col = gcdest[bj[j]]; 1691 PetscMPIInt cowner; 1692 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1693 if (rowner == cowner) dnnz[i]++; 1694 else onnz[i]++; 1695 } 1696 } 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1701 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1702 1703 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1704 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1705 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1706 for (i=0; i<m; i++) { 1707 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1708 PetscInt j0,rowlen; 1709 rowlen = ai[i+1] - ai[i]; 1710 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1711 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1712 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 rowlen = bi[i+1] - bi[i]; 1715 for (j0=j=0; j<rowlen; j0=j) { 1716 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1717 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1718 } 1719 } 1720 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1722 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1723 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1724 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1725 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1726 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1727 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1728 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1729 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1730 *B = Aperm; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1735 { 1736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1741 if (ghosts) *ghosts = aij->garray; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1746 { 1747 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1748 Mat A = mat->A,B = mat->B; 1749 PetscErrorCode ierr; 1750 PetscLogDouble isend[5],irecv[5]; 1751 1752 PetscFunctionBegin; 1753 info->block_size = 1.0; 1754 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1755 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 1761 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1762 isend[3] += info->memory; isend[4] += info->mallocs; 1763 if (flag == MAT_LOCAL) { 1764 info->nz_used = isend[0]; 1765 info->nz_allocated = isend[1]; 1766 info->nz_unneeded = isend[2]; 1767 info->memory = isend[3]; 1768 info->mallocs = isend[4]; 1769 } else if (flag == MAT_GLOBAL_MAX) { 1770 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1771 1772 info->nz_used = irecv[0]; 1773 info->nz_allocated = irecv[1]; 1774 info->nz_unneeded = irecv[2]; 1775 info->memory = irecv[3]; 1776 info->mallocs = irecv[4]; 1777 } else if (flag == MAT_GLOBAL_SUM) { 1778 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1779 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_USE_INODES: 1805 case MAT_IGNORE_ZERO_ENTRIES: 1806 MatCheckPreallocated(A,1); 1807 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1808 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1809 break; 1810 case MAT_ROW_ORIENTED: 1811 MatCheckPreallocated(A,1); 1812 a->roworiented = flg; 1813 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_NEW_DIAGONALS: 1818 case MAT_SORTED_FULL: 1819 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1820 break; 1821 case MAT_IGNORE_OFF_PROC_ENTRIES: 1822 a->donotstash = flg; 1823 break; 1824 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1825 case MAT_SPD: 1826 case MAT_SYMMETRIC: 1827 case MAT_STRUCTURALLY_SYMMETRIC: 1828 case MAT_HERMITIAN: 1829 case MAT_SYMMETRY_ETERNAL: 1830 break; 1831 case MAT_SUBMAT_SINGLEIS: 1832 A->submat_singleis = flg; 1833 break; 1834 case MAT_STRUCTURE_ONLY: 1835 /* The option is handled directly by MatSetOption() */ 1836 break; 1837 default: 1838 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1839 } 1840 PetscFunctionReturn(0); 1841 } 1842 1843 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1844 { 1845 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1846 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1847 PetscErrorCode ierr; 1848 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1849 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1850 PetscInt *cmap,*idx_p; 1851 1852 PetscFunctionBegin; 1853 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1854 mat->getrowactive = PETSC_TRUE; 1855 1856 if (!mat->rowvalues && (idx || v)) { 1857 /* 1858 allocate enough space to hold information from the longest row. 1859 */ 1860 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1861 PetscInt max = 1,tmp; 1862 for (i=0; i<matin->rmap->n; i++) { 1863 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1864 if (max < tmp) max = tmp; 1865 } 1866 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1867 } 1868 1869 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1870 lrow = row - rstart; 1871 1872 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1873 if (!v) {pvA = NULL; pvB = NULL;} 1874 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1875 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1876 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1877 nztot = nzA + nzB; 1878 1879 cmap = mat->garray; 1880 if (v || idx) { 1881 if (nztot) { 1882 /* Sort by increasing column numbers, assuming A and B already sorted */ 1883 PetscInt imark = -1; 1884 if (v) { 1885 *v = v_p = mat->rowvalues; 1886 for (i=0; i<nzB; i++) { 1887 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1888 else break; 1889 } 1890 imark = i; 1891 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1892 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1893 } 1894 if (idx) { 1895 *idx = idx_p = mat->rowindices; 1896 if (imark > -1) { 1897 for (i=0; i<imark; i++) { 1898 idx_p[i] = cmap[cworkB[i]]; 1899 } 1900 } else { 1901 for (i=0; i<nzB; i++) { 1902 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1903 else break; 1904 } 1905 imark = i; 1906 } 1907 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1908 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1909 } 1910 } else { 1911 if (idx) *idx = NULL; 1912 if (v) *v = NULL; 1913 } 1914 } 1915 *nz = nztot; 1916 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1917 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1922 { 1923 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1924 1925 PetscFunctionBegin; 1926 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1927 aij->getrowactive = PETSC_FALSE; 1928 PetscFunctionReturn(0); 1929 } 1930 1931 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1932 { 1933 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1934 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1935 PetscErrorCode ierr; 1936 PetscInt i,j,cstart = mat->cmap->rstart; 1937 PetscReal sum = 0.0; 1938 MatScalar *v; 1939 1940 PetscFunctionBegin; 1941 if (aij->size == 1) { 1942 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1943 } else { 1944 if (type == NORM_FROBENIUS) { 1945 v = amat->a; 1946 for (i=0; i<amat->nz; i++) { 1947 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1948 } 1949 v = bmat->a; 1950 for (i=0; i<bmat->nz; i++) { 1951 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1952 } 1953 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1954 *norm = PetscSqrtReal(*norm); 1955 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1956 } else if (type == NORM_1) { /* max column norm */ 1957 PetscReal *tmp,*tmp2; 1958 PetscInt *jj,*garray = aij->garray; 1959 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1960 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1961 *norm = 0.0; 1962 v = amat->a; jj = amat->j; 1963 for (j=0; j<amat->nz; j++) { 1964 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1965 } 1966 v = bmat->a; jj = bmat->j; 1967 for (j=0; j<bmat->nz; j++) { 1968 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1969 } 1970 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1971 for (j=0; j<mat->cmap->N; j++) { 1972 if (tmp2[j] > *norm) *norm = tmp2[j]; 1973 } 1974 ierr = PetscFree(tmp);CHKERRQ(ierr); 1975 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1976 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1977 } else if (type == NORM_INFINITY) { /* max row norm */ 1978 PetscReal ntemp = 0.0; 1979 for (j=0; j<aij->A->rmap->n; j++) { 1980 v = amat->a + amat->i[j]; 1981 sum = 0.0; 1982 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1983 sum += PetscAbsScalar(*v); v++; 1984 } 1985 v = bmat->a + bmat->i[j]; 1986 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 if (sum > ntemp) ntemp = sum; 1990 } 1991 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1992 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1993 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1999 { 2000 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2001 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2002 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2003 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2004 PetscErrorCode ierr; 2005 Mat B,A_diag,*B_diag; 2006 const MatScalar *array; 2007 2008 PetscFunctionBegin; 2009 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2010 ai = Aloc->i; aj = Aloc->j; 2011 bi = Bloc->i; bj = Bloc->j; 2012 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2013 PetscInt *d_nnz,*g_nnz,*o_nnz; 2014 PetscSFNode *oloc; 2015 PETSC_UNUSED PetscSF sf; 2016 2017 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2018 /* compute d_nnz for preallocation */ 2019 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2020 for (i=0; i<ai[ma]; i++) { 2021 d_nnz[aj[i]]++; 2022 } 2023 /* compute local off-diagonal contributions */ 2024 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2025 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2026 /* map those to global */ 2027 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2028 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2029 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2030 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2031 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2032 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2033 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2034 2035 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2036 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2037 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2038 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2039 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2040 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2041 } else { 2042 B = *matout; 2043 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2044 } 2045 2046 b = (Mat_MPIAIJ*)B->data; 2047 A_diag = a->A; 2048 B_diag = &b->A; 2049 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2050 A_diag_ncol = A_diag->cmap->N; 2051 B_diag_ilen = sub_B_diag->ilen; 2052 B_diag_i = sub_B_diag->i; 2053 2054 /* Set ilen for diagonal of B */ 2055 for (i=0; i<A_diag_ncol; i++) { 2056 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2057 } 2058 2059 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2060 very quickly (=without using MatSetValues), because all writes are local. */ 2061 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2062 2063 /* copy over the B part */ 2064 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2065 array = Bloc->a; 2066 row = A->rmap->rstart; 2067 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2068 cols_tmp = cols; 2069 for (i=0; i<mb; i++) { 2070 ncol = bi[i+1]-bi[i]; 2071 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2072 row++; 2073 array += ncol; cols_tmp += ncol; 2074 } 2075 ierr = PetscFree(cols);CHKERRQ(ierr); 2076 2077 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2080 *matout = B; 2081 } else { 2082 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2088 { 2089 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2090 Mat a = aij->A,b = aij->B; 2091 PetscErrorCode ierr; 2092 PetscInt s1,s2,s3; 2093 2094 PetscFunctionBegin; 2095 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2096 if (rr) { 2097 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2098 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2099 /* Overlap communication with computation. */ 2100 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2101 } 2102 if (ll) { 2103 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2104 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2105 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 2106 } 2107 /* scale the diagonal block */ 2108 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2109 2110 if (rr) { 2111 /* Do a scatter end and then right scale the off-diagonal block */ 2112 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2113 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2119 { 2120 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2129 { 2130 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2131 Mat a,b,c,d; 2132 PetscBool flg; 2133 PetscErrorCode ierr; 2134 2135 PetscFunctionBegin; 2136 a = matA->A; b = matA->B; 2137 c = matB->A; d = matB->B; 2138 2139 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2140 if (flg) { 2141 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2142 } 2143 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2148 { 2149 PetscErrorCode ierr; 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2151 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2152 2153 PetscFunctionBegin; 2154 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2155 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2156 /* because of the column compression in the off-processor part of the matrix a->B, 2157 the number of columns in a->B and b->B may be different, hence we cannot call 2158 the MatCopy() directly on the two parts. If need be, we can provide a more 2159 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2160 then copying the submatrices */ 2161 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2162 } else { 2163 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2164 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2165 } 2166 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 /* 2180 Computes the number of nonzeros per row needed for preallocation when X and Y 2181 have different nonzero structure. 2182 */ 2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2184 { 2185 PetscInt i,j,k,nzx,nzy; 2186 2187 PetscFunctionBegin; 2188 /* Set the number of nonzeros in the new matrix */ 2189 for (i=0; i<m; i++) { 2190 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2191 nzx = xi[i+1] - xi[i]; 2192 nzy = yi[i+1] - yi[i]; 2193 nnz[i] = 0; 2194 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2195 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2196 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2197 nnz[i]++; 2198 } 2199 for (; k<nzy; k++) nnz[i]++; 2200 } 2201 PetscFunctionReturn(0); 2202 } 2203 2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2206 { 2207 PetscErrorCode ierr; 2208 PetscInt m = Y->rmap->N; 2209 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2210 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2211 2212 PetscFunctionBegin; 2213 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2214 PetscFunctionReturn(0); 2215 } 2216 2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2218 { 2219 PetscErrorCode ierr; 2220 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2221 PetscBLASInt bnz,one=1; 2222 Mat_SeqAIJ *x,*y; 2223 2224 PetscFunctionBegin; 2225 if (str == SAME_NONZERO_PATTERN) { 2226 PetscScalar alpha = a; 2227 x = (Mat_SeqAIJ*)xx->A->data; 2228 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2229 y = (Mat_SeqAIJ*)yy->A->data; 2230 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2231 x = (Mat_SeqAIJ*)xx->B->data; 2232 y = (Mat_SeqAIJ*)yy->B->data; 2233 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2234 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2235 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2236 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2237 will be updated */ 2238 #if defined(PETSC_HAVE_DEVICE) 2239 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2240 Y->offloadmask = PETSC_OFFLOAD_CPU; 2241 } 2242 #endif 2243 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2244 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2245 } else { 2246 Mat B; 2247 PetscInt *nnz_d,*nnz_o; 2248 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2249 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2250 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2251 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2252 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2253 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2254 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2255 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2256 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2257 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2258 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2259 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2260 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2261 } 2262 PetscFunctionReturn(0); 2263 } 2264 2265 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2266 2267 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2268 { 2269 #if defined(PETSC_USE_COMPLEX) 2270 PetscErrorCode ierr; 2271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2272 2273 PetscFunctionBegin; 2274 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2275 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2276 #else 2277 PetscFunctionBegin; 2278 #endif 2279 PetscFunctionReturn(0); 2280 } 2281 2282 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2283 { 2284 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2285 PetscErrorCode ierr; 2286 2287 PetscFunctionBegin; 2288 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2289 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2290 PetscFunctionReturn(0); 2291 } 2292 2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2294 { 2295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2300 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2305 { 2306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2307 PetscErrorCode ierr; 2308 PetscInt i,*idxb = NULL,m = A->rmap->n; 2309 PetscScalar *va,*vv; 2310 Vec vB,vA; 2311 const PetscScalar *vb; 2312 2313 PetscFunctionBegin; 2314 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2315 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2316 2317 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2318 if (idx) { 2319 for (i=0; i<m; i++) { 2320 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2321 } 2322 } 2323 2324 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2325 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2326 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2327 2328 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2329 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2330 for (i=0; i<m; i++) { 2331 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2332 vv[i] = vb[i]; 2333 if (idx) idx[i] = a->garray[idxb[i]]; 2334 } else { 2335 vv[i] = va[i]; 2336 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2337 idx[i] = a->garray[idxb[i]]; 2338 } 2339 } 2340 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2341 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2342 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2343 ierr = PetscFree(idxb);CHKERRQ(ierr); 2344 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2345 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2346 PetscFunctionReturn(0); 2347 } 2348 2349 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2350 { 2351 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2352 PetscInt m = A->rmap->n,n = A->cmap->n; 2353 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2354 PetscInt *cmap = mat->garray; 2355 PetscInt *diagIdx, *offdiagIdx; 2356 Vec diagV, offdiagV; 2357 PetscScalar *a, *diagA, *offdiagA, *ba; 2358 PetscInt r,j,col,ncols,*bi,*bj; 2359 PetscErrorCode ierr; 2360 Mat B = mat->B; 2361 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2362 2363 PetscFunctionBegin; 2364 /* When a process holds entire A and other processes have no entry */ 2365 if (A->cmap->N == n) { 2366 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2367 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2368 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2369 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2370 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2371 PetscFunctionReturn(0); 2372 } else if (n == 0) { 2373 if (m) { 2374 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2375 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2376 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2377 } 2378 PetscFunctionReturn(0); 2379 } 2380 2381 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2382 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2383 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2384 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2385 2386 /* Get offdiagIdx[] for implicit 0.0 */ 2387 ba = b->a; 2388 bi = b->i; 2389 bj = b->j; 2390 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2391 for (r = 0; r < m; r++) { 2392 ncols = bi[r+1] - bi[r]; 2393 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2394 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2395 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2396 offdiagA[r] = 0.0; 2397 2398 /* Find first hole in the cmap */ 2399 for (j=0; j<ncols; j++) { 2400 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2401 if (col > j && j < cstart) { 2402 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2403 break; 2404 } else if (col > j + n && j >= cstart) { 2405 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2406 break; 2407 } 2408 } 2409 if (j == ncols && ncols < A->cmap->N - n) { 2410 /* a hole is outside compressed Bcols */ 2411 if (ncols == 0) { 2412 if (cstart) { 2413 offdiagIdx[r] = 0; 2414 } else offdiagIdx[r] = cend; 2415 } else { /* ncols > 0 */ 2416 offdiagIdx[r] = cmap[ncols-1] + 1; 2417 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2418 } 2419 } 2420 } 2421 2422 for (j=0; j<ncols; j++) { 2423 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2424 ba++; bj++; 2425 } 2426 } 2427 2428 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2429 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2430 for (r = 0; r < m; ++r) { 2431 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2432 a[r] = diagA[r]; 2433 if (idx) idx[r] = cstart + diagIdx[r]; 2434 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2435 a[r] = diagA[r]; 2436 if (idx) { 2437 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2438 idx[r] = cstart + diagIdx[r]; 2439 } else idx[r] = offdiagIdx[r]; 2440 } 2441 } else { 2442 a[r] = offdiagA[r]; 2443 if (idx) idx[r] = offdiagIdx[r]; 2444 } 2445 } 2446 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2447 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2448 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2449 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2450 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2451 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2456 { 2457 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2458 PetscInt m = A->rmap->n,n = A->cmap->n; 2459 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2460 PetscInt *cmap = mat->garray; 2461 PetscInt *diagIdx, *offdiagIdx; 2462 Vec diagV, offdiagV; 2463 PetscScalar *a, *diagA, *offdiagA, *ba; 2464 PetscInt r,j,col,ncols,*bi,*bj; 2465 PetscErrorCode ierr; 2466 Mat B = mat->B; 2467 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2468 2469 PetscFunctionBegin; 2470 /* When a process holds entire A and other processes have no entry */ 2471 if (A->cmap->N == n) { 2472 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2473 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2474 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2475 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2476 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2477 PetscFunctionReturn(0); 2478 } else if (n == 0) { 2479 if (m) { 2480 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2481 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2482 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2483 } 2484 PetscFunctionReturn(0); 2485 } 2486 2487 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2488 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2489 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2490 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2491 2492 /* Get offdiagIdx[] for implicit 0.0 */ 2493 ba = b->a; 2494 bi = b->i; 2495 bj = b->j; 2496 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2497 for (r = 0; r < m; r++) { 2498 ncols = bi[r+1] - bi[r]; 2499 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2500 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2501 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2502 offdiagA[r] = 0.0; 2503 2504 /* Find first hole in the cmap */ 2505 for (j=0; j<ncols; j++) { 2506 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2507 if (col > j && j < cstart) { 2508 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2509 break; 2510 } else if (col > j + n && j >= cstart) { 2511 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2512 break; 2513 } 2514 } 2515 if (j == ncols && ncols < A->cmap->N - n) { 2516 /* a hole is outside compressed Bcols */ 2517 if (ncols == 0) { 2518 if (cstart) { 2519 offdiagIdx[r] = 0; 2520 } else offdiagIdx[r] = cend; 2521 } else { /* ncols > 0 */ 2522 offdiagIdx[r] = cmap[ncols-1] + 1; 2523 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2524 } 2525 } 2526 } 2527 2528 for (j=0; j<ncols; j++) { 2529 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2530 ba++; bj++; 2531 } 2532 } 2533 2534 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2535 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2536 for (r = 0; r < m; ++r) { 2537 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2538 a[r] = diagA[r]; 2539 if (idx) idx[r] = cstart + diagIdx[r]; 2540 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2541 a[r] = diagA[r]; 2542 if (idx) { 2543 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2544 idx[r] = cstart + diagIdx[r]; 2545 } else idx[r] = offdiagIdx[r]; 2546 } 2547 } else { 2548 a[r] = offdiagA[r]; 2549 if (idx) idx[r] = offdiagIdx[r]; 2550 } 2551 } 2552 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2553 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2554 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2555 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2556 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2557 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2562 { 2563 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2564 PetscInt m = A->rmap->n,n = A->cmap->n; 2565 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2566 PetscInt *cmap = mat->garray; 2567 PetscInt *diagIdx, *offdiagIdx; 2568 Vec diagV, offdiagV; 2569 PetscScalar *a, *diagA, *offdiagA, *ba; 2570 PetscInt r,j,col,ncols,*bi,*bj; 2571 PetscErrorCode ierr; 2572 Mat B = mat->B; 2573 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2574 2575 PetscFunctionBegin; 2576 /* When a process holds entire A and other processes have no entry */ 2577 if (A->cmap->N == n) { 2578 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2579 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2580 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2581 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2582 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2583 PetscFunctionReturn(0); 2584 } else if (n == 0) { 2585 if (m) { 2586 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2587 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2588 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2589 } 2590 PetscFunctionReturn(0); 2591 } 2592 2593 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2594 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2595 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2596 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2597 2598 /* Get offdiagIdx[] for implicit 0.0 */ 2599 ba = b->a; 2600 bi = b->i; 2601 bj = b->j; 2602 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2603 for (r = 0; r < m; r++) { 2604 ncols = bi[r+1] - bi[r]; 2605 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2606 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2607 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2608 offdiagA[r] = 0.0; 2609 2610 /* Find first hole in the cmap */ 2611 for (j=0; j<ncols; j++) { 2612 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2613 if (col > j && j < cstart) { 2614 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2615 break; 2616 } else if (col > j + n && j >= cstart) { 2617 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2618 break; 2619 } 2620 } 2621 if (j == ncols && ncols < A->cmap->N - n) { 2622 /* a hole is outside compressed Bcols */ 2623 if (ncols == 0) { 2624 if (cstart) { 2625 offdiagIdx[r] = 0; 2626 } else offdiagIdx[r] = cend; 2627 } else { /* ncols > 0 */ 2628 offdiagIdx[r] = cmap[ncols-1] + 1; 2629 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2630 } 2631 } 2632 } 2633 2634 for (j=0; j<ncols; j++) { 2635 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2636 ba++; bj++; 2637 } 2638 } 2639 2640 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2641 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2642 for (r = 0; r < m; ++r) { 2643 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2644 a[r] = diagA[r]; 2645 if (idx) idx[r] = cstart + diagIdx[r]; 2646 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2647 a[r] = diagA[r]; 2648 if (idx) { 2649 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2650 idx[r] = cstart + diagIdx[r]; 2651 } else idx[r] = offdiagIdx[r]; 2652 } 2653 } else { 2654 a[r] = offdiagA[r]; 2655 if (idx) idx[r] = offdiagIdx[r]; 2656 } 2657 } 2658 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2659 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2660 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2661 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2662 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2663 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2664 PetscFunctionReturn(0); 2665 } 2666 2667 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2668 { 2669 PetscErrorCode ierr; 2670 Mat *dummy; 2671 2672 PetscFunctionBegin; 2673 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2674 *newmat = *dummy; 2675 ierr = PetscFree(dummy);CHKERRQ(ierr); 2676 PetscFunctionReturn(0); 2677 } 2678 2679 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2680 { 2681 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2682 PetscErrorCode ierr; 2683 2684 PetscFunctionBegin; 2685 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2686 A->factorerrortype = a->A->factorerrortype; 2687 PetscFunctionReturn(0); 2688 } 2689 2690 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2691 { 2692 PetscErrorCode ierr; 2693 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2694 2695 PetscFunctionBegin; 2696 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2697 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2698 if (x->assembled) { 2699 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2700 } else { 2701 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2702 } 2703 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2704 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2705 PetscFunctionReturn(0); 2706 } 2707 2708 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2709 { 2710 PetscFunctionBegin; 2711 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2712 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2713 PetscFunctionReturn(0); 2714 } 2715 2716 /*@ 2717 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2718 2719 Collective on Mat 2720 2721 Input Parameters: 2722 + A - the matrix 2723 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2724 2725 Level: advanced 2726 2727 @*/ 2728 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2729 { 2730 PetscErrorCode ierr; 2731 2732 PetscFunctionBegin; 2733 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2734 PetscFunctionReturn(0); 2735 } 2736 2737 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2738 { 2739 PetscErrorCode ierr; 2740 PetscBool sc = PETSC_FALSE,flg; 2741 2742 PetscFunctionBegin; 2743 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2744 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2745 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2746 if (flg) { 2747 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2748 } 2749 ierr = PetscOptionsTail();CHKERRQ(ierr); 2750 PetscFunctionReturn(0); 2751 } 2752 2753 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2754 { 2755 PetscErrorCode ierr; 2756 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2757 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2758 2759 PetscFunctionBegin; 2760 if (!Y->preallocated) { 2761 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2762 } else if (!aij->nz) { 2763 PetscInt nonew = aij->nonew; 2764 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2765 aij->nonew = nonew; 2766 } 2767 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2768 PetscFunctionReturn(0); 2769 } 2770 2771 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2772 { 2773 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2774 PetscErrorCode ierr; 2775 2776 PetscFunctionBegin; 2777 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2778 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2779 if (d) { 2780 PetscInt rstart; 2781 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2782 *d += rstart; 2783 2784 } 2785 PetscFunctionReturn(0); 2786 } 2787 2788 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2789 { 2790 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2791 PetscErrorCode ierr; 2792 2793 PetscFunctionBegin; 2794 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2795 PetscFunctionReturn(0); 2796 } 2797 2798 /* -------------------------------------------------------------------*/ 2799 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2800 MatGetRow_MPIAIJ, 2801 MatRestoreRow_MPIAIJ, 2802 MatMult_MPIAIJ, 2803 /* 4*/ MatMultAdd_MPIAIJ, 2804 MatMultTranspose_MPIAIJ, 2805 MatMultTransposeAdd_MPIAIJ, 2806 NULL, 2807 NULL, 2808 NULL, 2809 /*10*/ NULL, 2810 NULL, 2811 NULL, 2812 MatSOR_MPIAIJ, 2813 MatTranspose_MPIAIJ, 2814 /*15*/ MatGetInfo_MPIAIJ, 2815 MatEqual_MPIAIJ, 2816 MatGetDiagonal_MPIAIJ, 2817 MatDiagonalScale_MPIAIJ, 2818 MatNorm_MPIAIJ, 2819 /*20*/ MatAssemblyBegin_MPIAIJ, 2820 MatAssemblyEnd_MPIAIJ, 2821 MatSetOption_MPIAIJ, 2822 MatZeroEntries_MPIAIJ, 2823 /*24*/ MatZeroRows_MPIAIJ, 2824 NULL, 2825 NULL, 2826 NULL, 2827 NULL, 2828 /*29*/ MatSetUp_MPIAIJ, 2829 NULL, 2830 NULL, 2831 MatGetDiagonalBlock_MPIAIJ, 2832 NULL, 2833 /*34*/ MatDuplicate_MPIAIJ, 2834 NULL, 2835 NULL, 2836 NULL, 2837 NULL, 2838 /*39*/ MatAXPY_MPIAIJ, 2839 MatCreateSubMatrices_MPIAIJ, 2840 MatIncreaseOverlap_MPIAIJ, 2841 MatGetValues_MPIAIJ, 2842 MatCopy_MPIAIJ, 2843 /*44*/ MatGetRowMax_MPIAIJ, 2844 MatScale_MPIAIJ, 2845 MatShift_MPIAIJ, 2846 MatDiagonalSet_MPIAIJ, 2847 MatZeroRowsColumns_MPIAIJ, 2848 /*49*/ MatSetRandom_MPIAIJ, 2849 NULL, 2850 NULL, 2851 NULL, 2852 NULL, 2853 /*54*/ MatFDColoringCreate_MPIXAIJ, 2854 NULL, 2855 MatSetUnfactored_MPIAIJ, 2856 MatPermute_MPIAIJ, 2857 NULL, 2858 /*59*/ MatCreateSubMatrix_MPIAIJ, 2859 MatDestroy_MPIAIJ, 2860 MatView_MPIAIJ, 2861 NULL, 2862 NULL, 2863 /*64*/ NULL, 2864 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2865 NULL, 2866 NULL, 2867 NULL, 2868 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2869 MatGetRowMinAbs_MPIAIJ, 2870 NULL, 2871 NULL, 2872 NULL, 2873 NULL, 2874 /*75*/ MatFDColoringApply_AIJ, 2875 MatSetFromOptions_MPIAIJ, 2876 NULL, 2877 NULL, 2878 MatFindZeroDiagonals_MPIAIJ, 2879 /*80*/ NULL, 2880 NULL, 2881 NULL, 2882 /*83*/ MatLoad_MPIAIJ, 2883 MatIsSymmetric_MPIAIJ, 2884 NULL, 2885 NULL, 2886 NULL, 2887 NULL, 2888 /*89*/ NULL, 2889 NULL, 2890 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2891 NULL, 2892 NULL, 2893 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2894 NULL, 2895 NULL, 2896 NULL, 2897 MatBindToCPU_MPIAIJ, 2898 /*99*/ MatProductSetFromOptions_MPIAIJ, 2899 NULL, 2900 NULL, 2901 MatConjugate_MPIAIJ, 2902 NULL, 2903 /*104*/MatSetValuesRow_MPIAIJ, 2904 MatRealPart_MPIAIJ, 2905 MatImaginaryPart_MPIAIJ, 2906 NULL, 2907 NULL, 2908 /*109*/NULL, 2909 NULL, 2910 MatGetRowMin_MPIAIJ, 2911 NULL, 2912 MatMissingDiagonal_MPIAIJ, 2913 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2914 NULL, 2915 MatGetGhosts_MPIAIJ, 2916 NULL, 2917 NULL, 2918 /*119*/NULL, 2919 NULL, 2920 NULL, 2921 NULL, 2922 MatGetMultiProcBlock_MPIAIJ, 2923 /*124*/MatFindNonzeroRows_MPIAIJ, 2924 MatGetColumnNorms_MPIAIJ, 2925 MatInvertBlockDiagonal_MPIAIJ, 2926 MatInvertVariableBlockDiagonal_MPIAIJ, 2927 MatCreateSubMatricesMPI_MPIAIJ, 2928 /*129*/NULL, 2929 NULL, 2930 NULL, 2931 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2932 NULL, 2933 /*134*/NULL, 2934 NULL, 2935 NULL, 2936 NULL, 2937 NULL, 2938 /*139*/MatSetBlockSizes_MPIAIJ, 2939 NULL, 2940 NULL, 2941 MatFDColoringSetUp_MPIXAIJ, 2942 MatFindOffBlockDiagonalEntries_MPIAIJ, 2943 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2944 /*145*/NULL, 2945 NULL, 2946 NULL 2947 }; 2948 2949 /* ----------------------------------------------------------------------------------------*/ 2950 2951 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2952 { 2953 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2954 PetscErrorCode ierr; 2955 2956 PetscFunctionBegin; 2957 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2958 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2959 PetscFunctionReturn(0); 2960 } 2961 2962 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2963 { 2964 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2965 PetscErrorCode ierr; 2966 2967 PetscFunctionBegin; 2968 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2969 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2970 PetscFunctionReturn(0); 2971 } 2972 2973 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2974 { 2975 Mat_MPIAIJ *b; 2976 PetscErrorCode ierr; 2977 PetscMPIInt size; 2978 2979 PetscFunctionBegin; 2980 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2981 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2982 b = (Mat_MPIAIJ*)B->data; 2983 2984 #if defined(PETSC_USE_CTABLE) 2985 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2986 #else 2987 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2988 #endif 2989 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2990 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2991 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2992 2993 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2994 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2995 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2996 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2997 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2998 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2999 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3000 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3001 3002 if (!B->preallocated) { 3003 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3004 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3005 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3006 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3007 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3008 } 3009 3010 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3011 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3012 B->preallocated = PETSC_TRUE; 3013 B->was_assembled = PETSC_FALSE; 3014 B->assembled = PETSC_FALSE; 3015 PetscFunctionReturn(0); 3016 } 3017 3018 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 3019 { 3020 Mat_MPIAIJ *b; 3021 PetscErrorCode ierr; 3022 3023 PetscFunctionBegin; 3024 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3025 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3026 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3027 b = (Mat_MPIAIJ*)B->data; 3028 3029 #if defined(PETSC_USE_CTABLE) 3030 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 3031 #else 3032 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 3033 #endif 3034 ierr = PetscFree(b->garray);CHKERRQ(ierr); 3035 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 3036 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 3037 3038 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 3039 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 3040 B->preallocated = PETSC_TRUE; 3041 B->was_assembled = PETSC_FALSE; 3042 B->assembled = PETSC_FALSE; 3043 PetscFunctionReturn(0); 3044 } 3045 3046 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3047 { 3048 Mat mat; 3049 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3050 PetscErrorCode ierr; 3051 3052 PetscFunctionBegin; 3053 *newmat = NULL; 3054 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3055 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3056 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3057 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3058 a = (Mat_MPIAIJ*)mat->data; 3059 3060 mat->factortype = matin->factortype; 3061 mat->assembled = matin->assembled; 3062 mat->insertmode = NOT_SET_VALUES; 3063 mat->preallocated = matin->preallocated; 3064 3065 a->size = oldmat->size; 3066 a->rank = oldmat->rank; 3067 a->donotstash = oldmat->donotstash; 3068 a->roworiented = oldmat->roworiented; 3069 a->rowindices = NULL; 3070 a->rowvalues = NULL; 3071 a->getrowactive = PETSC_FALSE; 3072 3073 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3074 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3075 3076 if (oldmat->colmap) { 3077 #if defined(PETSC_USE_CTABLE) 3078 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3079 #else 3080 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 3081 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3082 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 3083 #endif 3084 } else a->colmap = NULL; 3085 if (oldmat->garray) { 3086 PetscInt len; 3087 len = oldmat->B->cmap->n; 3088 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 3089 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3090 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 3091 } else a->garray = NULL; 3092 3093 /* It may happen MatDuplicate is called with a non-assembled matrix 3094 In fact, MatDuplicate only requires the matrix to be preallocated 3095 This may happen inside a DMCreateMatrix_Shell */ 3096 if (oldmat->lvec) { 3097 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3098 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3099 } 3100 if (oldmat->Mvctx) { 3101 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3102 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3103 } 3104 if (oldmat->Mvctx_mpi1) { 3105 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 3106 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 3107 } 3108 3109 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3110 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3111 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3112 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3113 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3114 *newmat = mat; 3115 PetscFunctionReturn(0); 3116 } 3117 3118 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3119 { 3120 PetscBool isbinary, ishdf5; 3121 PetscErrorCode ierr; 3122 3123 PetscFunctionBegin; 3124 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3125 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3126 /* force binary viewer to load .info file if it has not yet done so */ 3127 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3128 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3129 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3130 if (isbinary) { 3131 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3132 } else if (ishdf5) { 3133 #if defined(PETSC_HAVE_HDF5) 3134 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3135 #else 3136 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3137 #endif 3138 } else { 3139 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3140 } 3141 PetscFunctionReturn(0); 3142 } 3143 3144 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3145 { 3146 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3147 PetscInt *rowidxs,*colidxs; 3148 PetscScalar *matvals; 3149 PetscErrorCode ierr; 3150 3151 PetscFunctionBegin; 3152 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3153 3154 /* read in matrix header */ 3155 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3156 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3157 M = header[1]; N = header[2]; nz = header[3]; 3158 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3159 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3160 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3161 3162 /* set block sizes from the viewer's .info file */ 3163 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3164 /* set global sizes if not set already */ 3165 if (mat->rmap->N < 0) mat->rmap->N = M; 3166 if (mat->cmap->N < 0) mat->cmap->N = N; 3167 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3168 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3169 3170 /* check if the matrix sizes are correct */ 3171 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3172 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3173 3174 /* read in row lengths and build row indices */ 3175 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3176 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3177 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3178 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3179 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3180 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3181 /* read in column indices and matrix values */ 3182 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3183 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3184 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3185 /* store matrix indices and values */ 3186 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3187 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3188 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3189 PetscFunctionReturn(0); 3190 } 3191 3192 /* Not scalable because of ISAllGather() unless getting all columns. */ 3193 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3194 { 3195 PetscErrorCode ierr; 3196 IS iscol_local; 3197 PetscBool isstride; 3198 PetscMPIInt lisstride=0,gisstride; 3199 3200 PetscFunctionBegin; 3201 /* check if we are grabbing all columns*/ 3202 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3203 3204 if (isstride) { 3205 PetscInt start,len,mstart,mlen; 3206 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3207 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3208 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3209 if (mstart == start && mlen-mstart == len) lisstride = 1; 3210 } 3211 3212 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3213 if (gisstride) { 3214 PetscInt N; 3215 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3216 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3217 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3218 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3219 } else { 3220 PetscInt cbs; 3221 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3222 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3223 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3224 } 3225 3226 *isseq = iscol_local; 3227 PetscFunctionReturn(0); 3228 } 3229 3230 /* 3231 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3232 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3233 3234 Input Parameters: 3235 mat - matrix 3236 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3237 i.e., mat->rstart <= isrow[i] < mat->rend 3238 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3239 i.e., mat->cstart <= iscol[i] < mat->cend 3240 Output Parameter: 3241 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3242 iscol_o - sequential column index set for retrieving mat->B 3243 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3244 */ 3245 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3246 { 3247 PetscErrorCode ierr; 3248 Vec x,cmap; 3249 const PetscInt *is_idx; 3250 PetscScalar *xarray,*cmaparray; 3251 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3252 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3253 Mat B=a->B; 3254 Vec lvec=a->lvec,lcmap; 3255 PetscInt i,cstart,cend,Bn=B->cmap->N; 3256 MPI_Comm comm; 3257 VecScatter Mvctx=a->Mvctx; 3258 3259 PetscFunctionBegin; 3260 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3261 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3262 3263 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3264 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3265 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3266 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3267 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3268 3269 /* Get start indices */ 3270 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3271 isstart -= ncols; 3272 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3273 3274 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3275 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3276 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3277 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3278 for (i=0; i<ncols; i++) { 3279 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3280 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3281 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3282 } 3283 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3284 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3285 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3286 3287 /* Get iscol_d */ 3288 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3289 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3290 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3291 3292 /* Get isrow_d */ 3293 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3294 rstart = mat->rmap->rstart; 3295 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3296 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3297 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3298 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3299 3300 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3301 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3302 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3303 3304 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3305 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3306 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3307 3308 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3309 3310 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3311 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3312 3313 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3314 /* off-process column indices */ 3315 count = 0; 3316 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3317 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3318 3319 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3320 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3321 for (i=0; i<Bn; i++) { 3322 if (PetscRealPart(xarray[i]) > -1.0) { 3323 idx[count] = i; /* local column index in off-diagonal part B */ 3324 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3325 count++; 3326 } 3327 } 3328 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3329 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3330 3331 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3332 /* cannot ensure iscol_o has same blocksize as iscol! */ 3333 3334 ierr = PetscFree(idx);CHKERRQ(ierr); 3335 *garray = cmap1; 3336 3337 ierr = VecDestroy(&x);CHKERRQ(ierr); 3338 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3339 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3340 PetscFunctionReturn(0); 3341 } 3342 3343 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3344 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3345 { 3346 PetscErrorCode ierr; 3347 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3348 Mat M = NULL; 3349 MPI_Comm comm; 3350 IS iscol_d,isrow_d,iscol_o; 3351 Mat Asub = NULL,Bsub = NULL; 3352 PetscInt n; 3353 3354 PetscFunctionBegin; 3355 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3356 3357 if (call == MAT_REUSE_MATRIX) { 3358 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3359 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3360 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3361 3362 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3363 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3364 3365 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3366 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3367 3368 /* Update diagonal and off-diagonal portions of submat */ 3369 asub = (Mat_MPIAIJ*)(*submat)->data; 3370 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3371 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3372 if (n) { 3373 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3374 } 3375 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3376 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3377 3378 } else { /* call == MAT_INITIAL_MATRIX) */ 3379 const PetscInt *garray; 3380 PetscInt BsubN; 3381 3382 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3383 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3384 3385 /* Create local submatrices Asub and Bsub */ 3386 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3387 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3388 3389 /* Create submatrix M */ 3390 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3391 3392 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3393 asub = (Mat_MPIAIJ*)M->data; 3394 3395 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3396 n = asub->B->cmap->N; 3397 if (BsubN > n) { 3398 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3399 const PetscInt *idx; 3400 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3401 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3402 3403 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3404 j = 0; 3405 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3406 for (i=0; i<n; i++) { 3407 if (j >= BsubN) break; 3408 while (subgarray[i] > garray[j]) j++; 3409 3410 if (subgarray[i] == garray[j]) { 3411 idx_new[i] = idx[j++]; 3412 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3413 } 3414 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3415 3416 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3417 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3418 3419 } else if (BsubN < n) { 3420 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3421 } 3422 3423 ierr = PetscFree(garray);CHKERRQ(ierr); 3424 *submat = M; 3425 3426 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3427 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3428 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3429 3430 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3431 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3432 3433 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3434 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3435 } 3436 PetscFunctionReturn(0); 3437 } 3438 3439 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3440 { 3441 PetscErrorCode ierr; 3442 IS iscol_local=NULL,isrow_d; 3443 PetscInt csize; 3444 PetscInt n,i,j,start,end; 3445 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3446 MPI_Comm comm; 3447 3448 PetscFunctionBegin; 3449 /* If isrow has same processor distribution as mat, 3450 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3451 if (call == MAT_REUSE_MATRIX) { 3452 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3453 if (isrow_d) { 3454 sameRowDist = PETSC_TRUE; 3455 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3456 } else { 3457 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3458 if (iscol_local) { 3459 sameRowDist = PETSC_TRUE; 3460 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3461 } 3462 } 3463 } else { 3464 /* Check if isrow has same processor distribution as mat */ 3465 sameDist[0] = PETSC_FALSE; 3466 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3467 if (!n) { 3468 sameDist[0] = PETSC_TRUE; 3469 } else { 3470 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3471 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3472 if (i >= start && j < end) { 3473 sameDist[0] = PETSC_TRUE; 3474 } 3475 } 3476 3477 /* Check if iscol has same processor distribution as mat */ 3478 sameDist[1] = PETSC_FALSE; 3479 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3480 if (!n) { 3481 sameDist[1] = PETSC_TRUE; 3482 } else { 3483 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3484 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3485 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3486 } 3487 3488 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3489 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3490 sameRowDist = tsameDist[0]; 3491 } 3492 3493 if (sameRowDist) { 3494 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3495 /* isrow and iscol have same processor distribution as mat */ 3496 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3497 PetscFunctionReturn(0); 3498 } else { /* sameRowDist */ 3499 /* isrow has same processor distribution as mat */ 3500 if (call == MAT_INITIAL_MATRIX) { 3501 PetscBool sorted; 3502 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3503 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3504 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3505 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3506 3507 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3508 if (sorted) { 3509 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3510 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3511 PetscFunctionReturn(0); 3512 } 3513 } else { /* call == MAT_REUSE_MATRIX */ 3514 IS iscol_sub; 3515 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3516 if (iscol_sub) { 3517 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3518 PetscFunctionReturn(0); 3519 } 3520 } 3521 } 3522 } 3523 3524 /* General case: iscol -> iscol_local which has global size of iscol */ 3525 if (call == MAT_REUSE_MATRIX) { 3526 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3527 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3528 } else { 3529 if (!iscol_local) { 3530 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3531 } 3532 } 3533 3534 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3535 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3536 3537 if (call == MAT_INITIAL_MATRIX) { 3538 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3539 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3540 } 3541 PetscFunctionReturn(0); 3542 } 3543 3544 /*@C 3545 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3546 and "off-diagonal" part of the matrix in CSR format. 3547 3548 Collective 3549 3550 Input Parameters: 3551 + comm - MPI communicator 3552 . A - "diagonal" portion of matrix 3553 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3554 - garray - global index of B columns 3555 3556 Output Parameter: 3557 . mat - the matrix, with input A as its local diagonal matrix 3558 Level: advanced 3559 3560 Notes: 3561 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3562 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3563 3564 .seealso: MatCreateMPIAIJWithSplitArrays() 3565 @*/ 3566 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3567 { 3568 PetscErrorCode ierr; 3569 Mat_MPIAIJ *maij; 3570 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3571 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3572 PetscScalar *oa=b->a; 3573 Mat Bnew; 3574 PetscInt m,n,N; 3575 3576 PetscFunctionBegin; 3577 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3578 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3579 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3580 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3581 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3582 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3583 3584 /* Get global columns of mat */ 3585 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3586 3587 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3588 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3589 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3590 maij = (Mat_MPIAIJ*)(*mat)->data; 3591 3592 (*mat)->preallocated = PETSC_TRUE; 3593 3594 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3595 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3596 3597 /* Set A as diagonal portion of *mat */ 3598 maij->A = A; 3599 3600 nz = oi[m]; 3601 for (i=0; i<nz; i++) { 3602 col = oj[i]; 3603 oj[i] = garray[col]; 3604 } 3605 3606 /* Set Bnew as off-diagonal portion of *mat */ 3607 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3608 bnew = (Mat_SeqAIJ*)Bnew->data; 3609 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3610 maij->B = Bnew; 3611 3612 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3613 3614 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3615 b->free_a = PETSC_FALSE; 3616 b->free_ij = PETSC_FALSE; 3617 ierr = MatDestroy(&B);CHKERRQ(ierr); 3618 3619 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3620 bnew->free_a = PETSC_TRUE; 3621 bnew->free_ij = PETSC_TRUE; 3622 3623 /* condense columns of maij->B */ 3624 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3625 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3626 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3627 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3628 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3629 PetscFunctionReturn(0); 3630 } 3631 3632 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3633 3634 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3635 { 3636 PetscErrorCode ierr; 3637 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3638 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3639 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3640 Mat M,Msub,B=a->B; 3641 MatScalar *aa; 3642 Mat_SeqAIJ *aij; 3643 PetscInt *garray = a->garray,*colsub,Ncols; 3644 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3645 IS iscol_sub,iscmap; 3646 const PetscInt *is_idx,*cmap; 3647 PetscBool allcolumns=PETSC_FALSE; 3648 MPI_Comm comm; 3649 3650 PetscFunctionBegin; 3651 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3652 3653 if (call == MAT_REUSE_MATRIX) { 3654 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3655 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3656 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3657 3658 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3659 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3660 3661 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3662 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3663 3664 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3665 3666 } else { /* call == MAT_INITIAL_MATRIX) */ 3667 PetscBool flg; 3668 3669 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3670 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3671 3672 /* (1) iscol -> nonscalable iscol_local */ 3673 /* Check for special case: each processor gets entire matrix columns */ 3674 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3675 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3676 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3677 if (allcolumns) { 3678 iscol_sub = iscol_local; 3679 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3680 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3681 3682 } else { 3683 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3684 PetscInt *idx,*cmap1,k; 3685 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3686 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3687 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3688 count = 0; 3689 k = 0; 3690 for (i=0; i<Ncols; i++) { 3691 j = is_idx[i]; 3692 if (j >= cstart && j < cend) { 3693 /* diagonal part of mat */ 3694 idx[count] = j; 3695 cmap1[count++] = i; /* column index in submat */ 3696 } else if (Bn) { 3697 /* off-diagonal part of mat */ 3698 if (j == garray[k]) { 3699 idx[count] = j; 3700 cmap1[count++] = i; /* column index in submat */ 3701 } else if (j > garray[k]) { 3702 while (j > garray[k] && k < Bn-1) k++; 3703 if (j == garray[k]) { 3704 idx[count] = j; 3705 cmap1[count++] = i; /* column index in submat */ 3706 } 3707 } 3708 } 3709 } 3710 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3711 3712 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3713 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3714 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3715 3716 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3717 } 3718 3719 /* (3) Create sequential Msub */ 3720 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3721 } 3722 3723 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3724 aij = (Mat_SeqAIJ*)(Msub)->data; 3725 ii = aij->i; 3726 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3727 3728 /* 3729 m - number of local rows 3730 Ncols - number of columns (same on all processors) 3731 rstart - first row in new global matrix generated 3732 */ 3733 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3734 3735 if (call == MAT_INITIAL_MATRIX) { 3736 /* (4) Create parallel newmat */ 3737 PetscMPIInt rank,size; 3738 PetscInt csize; 3739 3740 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3741 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3742 3743 /* 3744 Determine the number of non-zeros in the diagonal and off-diagonal 3745 portions of the matrix in order to do correct preallocation 3746 */ 3747 3748 /* first get start and end of "diagonal" columns */ 3749 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3750 if (csize == PETSC_DECIDE) { 3751 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3752 if (mglobal == Ncols) { /* square matrix */ 3753 nlocal = m; 3754 } else { 3755 nlocal = Ncols/size + ((Ncols % size) > rank); 3756 } 3757 } else { 3758 nlocal = csize; 3759 } 3760 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3761 rstart = rend - nlocal; 3762 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3763 3764 /* next, compute all the lengths */ 3765 jj = aij->j; 3766 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3767 olens = dlens + m; 3768 for (i=0; i<m; i++) { 3769 jend = ii[i+1] - ii[i]; 3770 olen = 0; 3771 dlen = 0; 3772 for (j=0; j<jend; j++) { 3773 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3774 else dlen++; 3775 jj++; 3776 } 3777 olens[i] = olen; 3778 dlens[i] = dlen; 3779 } 3780 3781 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3782 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3783 3784 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3785 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3786 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3787 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3788 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3789 ierr = PetscFree(dlens);CHKERRQ(ierr); 3790 3791 } else { /* call == MAT_REUSE_MATRIX */ 3792 M = *newmat; 3793 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3794 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3795 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3796 /* 3797 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3798 rather than the slower MatSetValues(). 3799 */ 3800 M->was_assembled = PETSC_TRUE; 3801 M->assembled = PETSC_FALSE; 3802 } 3803 3804 /* (5) Set values of Msub to *newmat */ 3805 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3806 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3807 3808 jj = aij->j; 3809 aa = aij->a; 3810 for (i=0; i<m; i++) { 3811 row = rstart + i; 3812 nz = ii[i+1] - ii[i]; 3813 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3814 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3815 jj += nz; aa += nz; 3816 } 3817 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3818 3819 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3820 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3821 3822 ierr = PetscFree(colsub);CHKERRQ(ierr); 3823 3824 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3825 if (call == MAT_INITIAL_MATRIX) { 3826 *newmat = M; 3827 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3828 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3829 3830 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3831 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3832 3833 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3834 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3835 3836 if (iscol_local) { 3837 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3838 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3839 } 3840 } 3841 PetscFunctionReturn(0); 3842 } 3843 3844 /* 3845 Not great since it makes two copies of the submatrix, first an SeqAIJ 3846 in local and then by concatenating the local matrices the end result. 3847 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3848 3849 Note: This requires a sequential iscol with all indices. 3850 */ 3851 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3852 { 3853 PetscErrorCode ierr; 3854 PetscMPIInt rank,size; 3855 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3856 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3857 Mat M,Mreuse; 3858 MatScalar *aa,*vwork; 3859 MPI_Comm comm; 3860 Mat_SeqAIJ *aij; 3861 PetscBool colflag,allcolumns=PETSC_FALSE; 3862 3863 PetscFunctionBegin; 3864 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3865 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3866 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3867 3868 /* Check for special case: each processor gets entire matrix columns */ 3869 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3870 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3871 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3872 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3873 3874 if (call == MAT_REUSE_MATRIX) { 3875 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3876 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3877 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3878 } else { 3879 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3880 } 3881 3882 /* 3883 m - number of local rows 3884 n - number of columns (same on all processors) 3885 rstart - first row in new global matrix generated 3886 */ 3887 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3888 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3889 if (call == MAT_INITIAL_MATRIX) { 3890 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3891 ii = aij->i; 3892 jj = aij->j; 3893 3894 /* 3895 Determine the number of non-zeros in the diagonal and off-diagonal 3896 portions of the matrix in order to do correct preallocation 3897 */ 3898 3899 /* first get start and end of "diagonal" columns */ 3900 if (csize == PETSC_DECIDE) { 3901 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3902 if (mglobal == n) { /* square matrix */ 3903 nlocal = m; 3904 } else { 3905 nlocal = n/size + ((n % size) > rank); 3906 } 3907 } else { 3908 nlocal = csize; 3909 } 3910 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3911 rstart = rend - nlocal; 3912 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3913 3914 /* next, compute all the lengths */ 3915 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3916 olens = dlens + m; 3917 for (i=0; i<m; i++) { 3918 jend = ii[i+1] - ii[i]; 3919 olen = 0; 3920 dlen = 0; 3921 for (j=0; j<jend; j++) { 3922 if (*jj < rstart || *jj >= rend) olen++; 3923 else dlen++; 3924 jj++; 3925 } 3926 olens[i] = olen; 3927 dlens[i] = dlen; 3928 } 3929 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3930 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3931 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3932 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3933 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3934 ierr = PetscFree(dlens);CHKERRQ(ierr); 3935 } else { 3936 PetscInt ml,nl; 3937 3938 M = *newmat; 3939 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3940 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3941 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3942 /* 3943 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3944 rather than the slower MatSetValues(). 3945 */ 3946 M->was_assembled = PETSC_TRUE; 3947 M->assembled = PETSC_FALSE; 3948 } 3949 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3950 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3951 ii = aij->i; 3952 jj = aij->j; 3953 aa = aij->a; 3954 for (i=0; i<m; i++) { 3955 row = rstart + i; 3956 nz = ii[i+1] - ii[i]; 3957 cwork = jj; jj += nz; 3958 vwork = aa; aa += nz; 3959 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3960 } 3961 3962 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3963 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3964 *newmat = M; 3965 3966 /* save submatrix used in processor for next request */ 3967 if (call == MAT_INITIAL_MATRIX) { 3968 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3969 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3970 } 3971 PetscFunctionReturn(0); 3972 } 3973 3974 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3975 { 3976 PetscInt m,cstart, cend,j,nnz,i,d; 3977 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3978 const PetscInt *JJ; 3979 PetscErrorCode ierr; 3980 PetscBool nooffprocentries; 3981 3982 PetscFunctionBegin; 3983 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3984 3985 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3986 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3987 m = B->rmap->n; 3988 cstart = B->cmap->rstart; 3989 cend = B->cmap->rend; 3990 rstart = B->rmap->rstart; 3991 3992 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3993 3994 if (PetscDefined(USE_DEBUG)) { 3995 for (i=0; i<m; i++) { 3996 nnz = Ii[i+1]- Ii[i]; 3997 JJ = J + Ii[i]; 3998 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3999 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 4000 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 4001 } 4002 } 4003 4004 for (i=0; i<m; i++) { 4005 nnz = Ii[i+1]- Ii[i]; 4006 JJ = J + Ii[i]; 4007 nnz_max = PetscMax(nnz_max,nnz); 4008 d = 0; 4009 for (j=0; j<nnz; j++) { 4010 if (cstart <= JJ[j] && JJ[j] < cend) d++; 4011 } 4012 d_nnz[i] = d; 4013 o_nnz[i] = nnz - d; 4014 } 4015 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 4016 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 4017 4018 for (i=0; i<m; i++) { 4019 ii = i + rstart; 4020 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 4021 } 4022 nooffprocentries = B->nooffprocentries; 4023 B->nooffprocentries = PETSC_TRUE; 4024 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4025 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4026 B->nooffprocentries = nooffprocentries; 4027 4028 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4029 PetscFunctionReturn(0); 4030 } 4031 4032 /*@ 4033 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4034 (the default parallel PETSc format). 4035 4036 Collective 4037 4038 Input Parameters: 4039 + B - the matrix 4040 . i - the indices into j for the start of each local row (starts with zero) 4041 . j - the column indices for each local row (starts with zero) 4042 - v - optional values in the matrix 4043 4044 Level: developer 4045 4046 Notes: 4047 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 4048 thus you CANNOT change the matrix entries by changing the values of v[] after you have 4049 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4050 4051 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4052 4053 The format which is used for the sparse matrix input, is equivalent to a 4054 row-major ordering.. i.e for the following matrix, the input data expected is 4055 as shown 4056 4057 $ 1 0 0 4058 $ 2 0 3 P0 4059 $ ------- 4060 $ 4 5 6 P1 4061 $ 4062 $ Process0 [P0]: rows_owned=[0,1] 4063 $ i = {0,1,3} [size = nrow+1 = 2+1] 4064 $ j = {0,0,2} [size = 3] 4065 $ v = {1,2,3} [size = 3] 4066 $ 4067 $ Process1 [P1]: rows_owned=[2] 4068 $ i = {0,3} [size = nrow+1 = 1+1] 4069 $ j = {0,1,2} [size = 3] 4070 $ v = {4,5,6} [size = 3] 4071 4072 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4073 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4074 @*/ 4075 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4076 { 4077 PetscErrorCode ierr; 4078 4079 PetscFunctionBegin; 4080 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4081 PetscFunctionReturn(0); 4082 } 4083 4084 /*@C 4085 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4086 (the default parallel PETSc format). For good matrix assembly performance 4087 the user should preallocate the matrix storage by setting the parameters 4088 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4089 performance can be increased by more than a factor of 50. 4090 4091 Collective 4092 4093 Input Parameters: 4094 + B - the matrix 4095 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4096 (same value is used for all local rows) 4097 . d_nnz - array containing the number of nonzeros in the various rows of the 4098 DIAGONAL portion of the local submatrix (possibly different for each row) 4099 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4100 The size of this array is equal to the number of local rows, i.e 'm'. 4101 For matrices that will be factored, you must leave room for (and set) 4102 the diagonal entry even if it is zero. 4103 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4104 submatrix (same value is used for all local rows). 4105 - o_nnz - array containing the number of nonzeros in the various rows of the 4106 OFF-DIAGONAL portion of the local submatrix (possibly different for 4107 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4108 structure. The size of this array is equal to the number 4109 of local rows, i.e 'm'. 4110 4111 If the *_nnz parameter is given then the *_nz parameter is ignored 4112 4113 The AIJ format (also called the Yale sparse matrix format or 4114 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4115 storage. The stored row and column indices begin with zero. 4116 See Users-Manual: ch_mat for details. 4117 4118 The parallel matrix is partitioned such that the first m0 rows belong to 4119 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4120 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4121 4122 The DIAGONAL portion of the local submatrix of a processor can be defined 4123 as the submatrix which is obtained by extraction the part corresponding to 4124 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4125 first row that belongs to the processor, r2 is the last row belonging to 4126 the this processor, and c1-c2 is range of indices of the local part of a 4127 vector suitable for applying the matrix to. This is an mxn matrix. In the 4128 common case of a square matrix, the row and column ranges are the same and 4129 the DIAGONAL part is also square. The remaining portion of the local 4130 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4131 4132 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4133 4134 You can call MatGetInfo() to get information on how effective the preallocation was; 4135 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4136 You can also run with the option -info and look for messages with the string 4137 malloc in them to see if additional memory allocation was needed. 4138 4139 Example usage: 4140 4141 Consider the following 8x8 matrix with 34 non-zero values, that is 4142 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4143 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4144 as follows: 4145 4146 .vb 4147 1 2 0 | 0 3 0 | 0 4 4148 Proc0 0 5 6 | 7 0 0 | 8 0 4149 9 0 10 | 11 0 0 | 12 0 4150 ------------------------------------- 4151 13 0 14 | 15 16 17 | 0 0 4152 Proc1 0 18 0 | 19 20 21 | 0 0 4153 0 0 0 | 22 23 0 | 24 0 4154 ------------------------------------- 4155 Proc2 25 26 27 | 0 0 28 | 29 0 4156 30 0 0 | 31 32 33 | 0 34 4157 .ve 4158 4159 This can be represented as a collection of submatrices as: 4160 4161 .vb 4162 A B C 4163 D E F 4164 G H I 4165 .ve 4166 4167 Where the submatrices A,B,C are owned by proc0, D,E,F are 4168 owned by proc1, G,H,I are owned by proc2. 4169 4170 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4171 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4172 The 'M','N' parameters are 8,8, and have the same values on all procs. 4173 4174 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4175 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4176 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4177 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4178 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4179 matrix, ans [DF] as another SeqAIJ matrix. 4180 4181 When d_nz, o_nz parameters are specified, d_nz storage elements are 4182 allocated for every row of the local diagonal submatrix, and o_nz 4183 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4184 One way to choose d_nz and o_nz is to use the max nonzerors per local 4185 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4186 In this case, the values of d_nz,o_nz are: 4187 .vb 4188 proc0 : dnz = 2, o_nz = 2 4189 proc1 : dnz = 3, o_nz = 2 4190 proc2 : dnz = 1, o_nz = 4 4191 .ve 4192 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4193 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4194 for proc3. i.e we are using 12+15+10=37 storage locations to store 4195 34 values. 4196 4197 When d_nnz, o_nnz parameters are specified, the storage is specified 4198 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4199 In the above case the values for d_nnz,o_nnz are: 4200 .vb 4201 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4202 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4203 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4204 .ve 4205 Here the space allocated is sum of all the above values i.e 34, and 4206 hence pre-allocation is perfect. 4207 4208 Level: intermediate 4209 4210 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4211 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4212 @*/ 4213 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4214 { 4215 PetscErrorCode ierr; 4216 4217 PetscFunctionBegin; 4218 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4219 PetscValidType(B,1); 4220 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4221 PetscFunctionReturn(0); 4222 } 4223 4224 /*@ 4225 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4226 CSR format for the local rows. 4227 4228 Collective 4229 4230 Input Parameters: 4231 + comm - MPI communicator 4232 . m - number of local rows (Cannot be PETSC_DECIDE) 4233 . n - This value should be the same as the local size used in creating the 4234 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4235 calculated if N is given) For square matrices n is almost always m. 4236 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4237 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4238 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4239 . j - column indices 4240 - a - matrix values 4241 4242 Output Parameter: 4243 . mat - the matrix 4244 4245 Level: intermediate 4246 4247 Notes: 4248 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4249 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4250 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4251 4252 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4253 4254 The format which is used for the sparse matrix input, is equivalent to a 4255 row-major ordering.. i.e for the following matrix, the input data expected is 4256 as shown 4257 4258 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4259 4260 $ 1 0 0 4261 $ 2 0 3 P0 4262 $ ------- 4263 $ 4 5 6 P1 4264 $ 4265 $ Process0 [P0]: rows_owned=[0,1] 4266 $ i = {0,1,3} [size = nrow+1 = 2+1] 4267 $ j = {0,0,2} [size = 3] 4268 $ v = {1,2,3} [size = 3] 4269 $ 4270 $ Process1 [P1]: rows_owned=[2] 4271 $ i = {0,3} [size = nrow+1 = 1+1] 4272 $ j = {0,1,2} [size = 3] 4273 $ v = {4,5,6} [size = 3] 4274 4275 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4276 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4277 @*/ 4278 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4279 { 4280 PetscErrorCode ierr; 4281 4282 PetscFunctionBegin; 4283 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4284 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4285 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4286 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4287 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4288 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4289 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4290 PetscFunctionReturn(0); 4291 } 4292 4293 /*@ 4294 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4295 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4296 4297 Collective 4298 4299 Input Parameters: 4300 + mat - the matrix 4301 . m - number of local rows (Cannot be PETSC_DECIDE) 4302 . n - This value should be the same as the local size used in creating the 4303 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4304 calculated if N is given) For square matrices n is almost always m. 4305 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4306 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4307 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4308 . J - column indices 4309 - v - matrix values 4310 4311 Level: intermediate 4312 4313 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4314 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4315 @*/ 4316 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4317 { 4318 PetscErrorCode ierr; 4319 PetscInt cstart,nnz,i,j; 4320 PetscInt *ld; 4321 PetscBool nooffprocentries; 4322 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4323 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4324 PetscScalar *ad = Ad->a, *ao = Ao->a; 4325 const PetscInt *Adi = Ad->i; 4326 PetscInt ldi,Iii,md; 4327 4328 PetscFunctionBegin; 4329 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4330 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4331 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4332 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4333 4334 cstart = mat->cmap->rstart; 4335 if (!Aij->ld) { 4336 /* count number of entries below block diagonal */ 4337 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4338 Aij->ld = ld; 4339 for (i=0; i<m; i++) { 4340 nnz = Ii[i+1]- Ii[i]; 4341 j = 0; 4342 while (J[j] < cstart && j < nnz) {j++;} 4343 J += nnz; 4344 ld[i] = j; 4345 } 4346 } else { 4347 ld = Aij->ld; 4348 } 4349 4350 for (i=0; i<m; i++) { 4351 nnz = Ii[i+1]- Ii[i]; 4352 Iii = Ii[i]; 4353 ldi = ld[i]; 4354 md = Adi[i+1]-Adi[i]; 4355 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4356 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4357 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4358 ad += md; 4359 ao += nnz - md; 4360 } 4361 nooffprocentries = mat->nooffprocentries; 4362 mat->nooffprocentries = PETSC_TRUE; 4363 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4364 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4365 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4366 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4367 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4368 mat->nooffprocentries = nooffprocentries; 4369 PetscFunctionReturn(0); 4370 } 4371 4372 /*@C 4373 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4374 (the default parallel PETSc format). For good matrix assembly performance 4375 the user should preallocate the matrix storage by setting the parameters 4376 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4377 performance can be increased by more than a factor of 50. 4378 4379 Collective 4380 4381 Input Parameters: 4382 + comm - MPI communicator 4383 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4384 This value should be the same as the local size used in creating the 4385 y vector for the matrix-vector product y = Ax. 4386 . n - This value should be the same as the local size used in creating the 4387 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4388 calculated if N is given) For square matrices n is almost always m. 4389 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4390 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4391 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4392 (same value is used for all local rows) 4393 . d_nnz - array containing the number of nonzeros in the various rows of the 4394 DIAGONAL portion of the local submatrix (possibly different for each row) 4395 or NULL, if d_nz is used to specify the nonzero structure. 4396 The size of this array is equal to the number of local rows, i.e 'm'. 4397 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4398 submatrix (same value is used for all local rows). 4399 - o_nnz - array containing the number of nonzeros in the various rows of the 4400 OFF-DIAGONAL portion of the local submatrix (possibly different for 4401 each row) or NULL, if o_nz is used to specify the nonzero 4402 structure. The size of this array is equal to the number 4403 of local rows, i.e 'm'. 4404 4405 Output Parameter: 4406 . A - the matrix 4407 4408 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4409 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4410 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4411 4412 Notes: 4413 If the *_nnz parameter is given then the *_nz parameter is ignored 4414 4415 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4416 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4417 storage requirements for this matrix. 4418 4419 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4420 processor than it must be used on all processors that share the object for 4421 that argument. 4422 4423 The user MUST specify either the local or global matrix dimensions 4424 (possibly both). 4425 4426 The parallel matrix is partitioned across processors such that the 4427 first m0 rows belong to process 0, the next m1 rows belong to 4428 process 1, the next m2 rows belong to process 2 etc.. where 4429 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4430 values corresponding to [m x N] submatrix. 4431 4432 The columns are logically partitioned with the n0 columns belonging 4433 to 0th partition, the next n1 columns belonging to the next 4434 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4435 4436 The DIAGONAL portion of the local submatrix on any given processor 4437 is the submatrix corresponding to the rows and columns m,n 4438 corresponding to the given processor. i.e diagonal matrix on 4439 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4440 etc. The remaining portion of the local submatrix [m x (N-n)] 4441 constitute the OFF-DIAGONAL portion. The example below better 4442 illustrates this concept. 4443 4444 For a square global matrix we define each processor's diagonal portion 4445 to be its local rows and the corresponding columns (a square submatrix); 4446 each processor's off-diagonal portion encompasses the remainder of the 4447 local matrix (a rectangular submatrix). 4448 4449 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4450 4451 When calling this routine with a single process communicator, a matrix of 4452 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4453 type of communicator, use the construction mechanism 4454 .vb 4455 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4456 .ve 4457 4458 $ MatCreate(...,&A); 4459 $ MatSetType(A,MATMPIAIJ); 4460 $ MatSetSizes(A, m,n,M,N); 4461 $ MatMPIAIJSetPreallocation(A,...); 4462 4463 By default, this format uses inodes (identical nodes) when possible. 4464 We search for consecutive rows with the same nonzero structure, thereby 4465 reusing matrix information to achieve increased efficiency. 4466 4467 Options Database Keys: 4468 + -mat_no_inode - Do not use inodes 4469 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4470 4471 4472 4473 Example usage: 4474 4475 Consider the following 8x8 matrix with 34 non-zero values, that is 4476 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4477 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4478 as follows 4479 4480 .vb 4481 1 2 0 | 0 3 0 | 0 4 4482 Proc0 0 5 6 | 7 0 0 | 8 0 4483 9 0 10 | 11 0 0 | 12 0 4484 ------------------------------------- 4485 13 0 14 | 15 16 17 | 0 0 4486 Proc1 0 18 0 | 19 20 21 | 0 0 4487 0 0 0 | 22 23 0 | 24 0 4488 ------------------------------------- 4489 Proc2 25 26 27 | 0 0 28 | 29 0 4490 30 0 0 | 31 32 33 | 0 34 4491 .ve 4492 4493 This can be represented as a collection of submatrices as 4494 4495 .vb 4496 A B C 4497 D E F 4498 G H I 4499 .ve 4500 4501 Where the submatrices A,B,C are owned by proc0, D,E,F are 4502 owned by proc1, G,H,I are owned by proc2. 4503 4504 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4505 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4506 The 'M','N' parameters are 8,8, and have the same values on all procs. 4507 4508 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4509 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4510 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4511 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4512 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4513 matrix, ans [DF] as another SeqAIJ matrix. 4514 4515 When d_nz, o_nz parameters are specified, d_nz storage elements are 4516 allocated for every row of the local diagonal submatrix, and o_nz 4517 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4518 One way to choose d_nz and o_nz is to use the max nonzerors per local 4519 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4520 In this case, the values of d_nz,o_nz are 4521 .vb 4522 proc0 : dnz = 2, o_nz = 2 4523 proc1 : dnz = 3, o_nz = 2 4524 proc2 : dnz = 1, o_nz = 4 4525 .ve 4526 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4527 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4528 for proc3. i.e we are using 12+15+10=37 storage locations to store 4529 34 values. 4530 4531 When d_nnz, o_nnz parameters are specified, the storage is specified 4532 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4533 In the above case the values for d_nnz,o_nnz are 4534 .vb 4535 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4536 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4537 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4538 .ve 4539 Here the space allocated is sum of all the above values i.e 34, and 4540 hence pre-allocation is perfect. 4541 4542 Level: intermediate 4543 4544 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4545 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4546 @*/ 4547 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4548 { 4549 PetscErrorCode ierr; 4550 PetscMPIInt size; 4551 4552 PetscFunctionBegin; 4553 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4554 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4555 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4556 if (size > 1) { 4557 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4558 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4559 } else { 4560 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4561 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4562 } 4563 PetscFunctionReturn(0); 4564 } 4565 4566 /*@C 4567 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4568 4569 Not collective 4570 4571 Input Parameter: 4572 . A - The MPIAIJ matrix 4573 4574 Output Parameters: 4575 + Ad - The local diagonal block as a SeqAIJ matrix 4576 . Ao - The local off-diagonal block as a SeqAIJ matrix 4577 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4578 4579 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4580 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4581 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4582 local column numbers to global column numbers in the original matrix. 4583 4584 Level: intermediate 4585 4586 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4587 @*/ 4588 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4589 { 4590 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4591 PetscBool flg; 4592 PetscErrorCode ierr; 4593 4594 PetscFunctionBegin; 4595 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4596 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4597 if (Ad) *Ad = a->A; 4598 if (Ao) *Ao = a->B; 4599 if (colmap) *colmap = a->garray; 4600 PetscFunctionReturn(0); 4601 } 4602 4603 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4604 { 4605 PetscErrorCode ierr; 4606 PetscInt m,N,i,rstart,nnz,Ii; 4607 PetscInt *indx; 4608 PetscScalar *values; 4609 4610 PetscFunctionBegin; 4611 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4612 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4613 PetscInt *dnz,*onz,sum,bs,cbs; 4614 4615 if (n == PETSC_DECIDE) { 4616 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4617 } 4618 /* Check sum(n) = N */ 4619 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4620 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4621 4622 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4623 rstart -= m; 4624 4625 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4626 for (i=0; i<m; i++) { 4627 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4628 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4629 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4630 } 4631 4632 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4633 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4634 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4635 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4636 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4637 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4638 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4639 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4640 } 4641 4642 /* numeric phase */ 4643 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4644 for (i=0; i<m; i++) { 4645 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4646 Ii = i + rstart; 4647 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4648 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4649 } 4650 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4651 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4652 PetscFunctionReturn(0); 4653 } 4654 4655 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4656 { 4657 PetscErrorCode ierr; 4658 PetscMPIInt rank; 4659 PetscInt m,N,i,rstart,nnz; 4660 size_t len; 4661 const PetscInt *indx; 4662 PetscViewer out; 4663 char *name; 4664 Mat B; 4665 const PetscScalar *values; 4666 4667 PetscFunctionBegin; 4668 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4669 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4670 /* Should this be the type of the diagonal block of A? */ 4671 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4672 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4673 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4674 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4675 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4676 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4677 for (i=0; i<m; i++) { 4678 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4679 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4680 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4681 } 4682 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4683 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4684 4685 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4686 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4687 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4688 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4689 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4690 ierr = PetscFree(name);CHKERRQ(ierr); 4691 ierr = MatView(B,out);CHKERRQ(ierr); 4692 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4693 ierr = MatDestroy(&B);CHKERRQ(ierr); 4694 PetscFunctionReturn(0); 4695 } 4696 4697 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4698 { 4699 PetscErrorCode ierr; 4700 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4701 4702 PetscFunctionBegin; 4703 if (!merge) PetscFunctionReturn(0); 4704 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4705 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4706 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4707 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4708 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4709 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4710 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4711 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4712 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4713 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4714 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4715 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4716 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4717 ierr = PetscFree(merge);CHKERRQ(ierr); 4718 PetscFunctionReturn(0); 4719 } 4720 4721 #include <../src/mat/utils/freespace.h> 4722 #include <petscbt.h> 4723 4724 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4725 { 4726 PetscErrorCode ierr; 4727 MPI_Comm comm; 4728 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4729 PetscMPIInt size,rank,taga,*len_s; 4730 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4731 PetscInt proc,m; 4732 PetscInt **buf_ri,**buf_rj; 4733 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4734 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4735 MPI_Request *s_waits,*r_waits; 4736 MPI_Status *status; 4737 MatScalar *aa=a->a; 4738 MatScalar **abuf_r,*ba_i; 4739 Mat_Merge_SeqsToMPI *merge; 4740 PetscContainer container; 4741 4742 PetscFunctionBegin; 4743 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4744 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4745 4746 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4747 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4748 4749 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4750 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4751 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4752 4753 bi = merge->bi; 4754 bj = merge->bj; 4755 buf_ri = merge->buf_ri; 4756 buf_rj = merge->buf_rj; 4757 4758 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4759 owners = merge->rowmap->range; 4760 len_s = merge->len_s; 4761 4762 /* send and recv matrix values */ 4763 /*-----------------------------*/ 4764 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4765 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4766 4767 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4768 for (proc=0,k=0; proc<size; proc++) { 4769 if (!len_s[proc]) continue; 4770 i = owners[proc]; 4771 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4772 k++; 4773 } 4774 4775 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4776 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4777 ierr = PetscFree(status);CHKERRQ(ierr); 4778 4779 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4780 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4781 4782 /* insert mat values of mpimat */ 4783 /*----------------------------*/ 4784 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4785 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4786 4787 for (k=0; k<merge->nrecv; k++) { 4788 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4789 nrows = *(buf_ri_k[k]); 4790 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4791 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4792 } 4793 4794 /* set values of ba */ 4795 m = merge->rowmap->n; 4796 for (i=0; i<m; i++) { 4797 arow = owners[rank] + i; 4798 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4799 bnzi = bi[i+1] - bi[i]; 4800 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4801 4802 /* add local non-zero vals of this proc's seqmat into ba */ 4803 anzi = ai[arow+1] - ai[arow]; 4804 aj = a->j + ai[arow]; 4805 aa = a->a + ai[arow]; 4806 nextaj = 0; 4807 for (j=0; nextaj<anzi; j++) { 4808 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4809 ba_i[j] += aa[nextaj++]; 4810 } 4811 } 4812 4813 /* add received vals into ba */ 4814 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4815 /* i-th row */ 4816 if (i == *nextrow[k]) { 4817 anzi = *(nextai[k]+1) - *nextai[k]; 4818 aj = buf_rj[k] + *(nextai[k]); 4819 aa = abuf_r[k] + *(nextai[k]); 4820 nextaj = 0; 4821 for (j=0; nextaj<anzi; j++) { 4822 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4823 ba_i[j] += aa[nextaj++]; 4824 } 4825 } 4826 nextrow[k]++; nextai[k]++; 4827 } 4828 } 4829 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4830 } 4831 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4832 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4833 4834 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4835 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4836 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4837 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4838 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4839 PetscFunctionReturn(0); 4840 } 4841 4842 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4843 { 4844 PetscErrorCode ierr; 4845 Mat B_mpi; 4846 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4847 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4848 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4849 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4850 PetscInt len,proc,*dnz,*onz,bs,cbs; 4851 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4852 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4853 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4854 MPI_Status *status; 4855 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4856 PetscBT lnkbt; 4857 Mat_Merge_SeqsToMPI *merge; 4858 PetscContainer container; 4859 4860 PetscFunctionBegin; 4861 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4862 4863 /* make sure it is a PETSc comm */ 4864 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4865 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4866 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4867 4868 ierr = PetscNew(&merge);CHKERRQ(ierr); 4869 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4870 4871 /* determine row ownership */ 4872 /*---------------------------------------------------------*/ 4873 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4874 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4875 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4876 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4877 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4878 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4879 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4880 4881 m = merge->rowmap->n; 4882 owners = merge->rowmap->range; 4883 4884 /* determine the number of messages to send, their lengths */ 4885 /*---------------------------------------------------------*/ 4886 len_s = merge->len_s; 4887 4888 len = 0; /* length of buf_si[] */ 4889 merge->nsend = 0; 4890 for (proc=0; proc<size; proc++) { 4891 len_si[proc] = 0; 4892 if (proc == rank) { 4893 len_s[proc] = 0; 4894 } else { 4895 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4896 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4897 } 4898 if (len_s[proc]) { 4899 merge->nsend++; 4900 nrows = 0; 4901 for (i=owners[proc]; i<owners[proc+1]; i++) { 4902 if (ai[i+1] > ai[i]) nrows++; 4903 } 4904 len_si[proc] = 2*(nrows+1); 4905 len += len_si[proc]; 4906 } 4907 } 4908 4909 /* determine the number and length of messages to receive for ij-structure */ 4910 /*-------------------------------------------------------------------------*/ 4911 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4912 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4913 4914 /* post the Irecv of j-structure */ 4915 /*-------------------------------*/ 4916 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4917 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4918 4919 /* post the Isend of j-structure */ 4920 /*--------------------------------*/ 4921 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4922 4923 for (proc=0, k=0; proc<size; proc++) { 4924 if (!len_s[proc]) continue; 4925 i = owners[proc]; 4926 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4927 k++; 4928 } 4929 4930 /* receives and sends of j-structure are complete */ 4931 /*------------------------------------------------*/ 4932 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4933 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4934 4935 /* send and recv i-structure */ 4936 /*---------------------------*/ 4937 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4938 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4939 4940 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4941 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4942 for (proc=0,k=0; proc<size; proc++) { 4943 if (!len_s[proc]) continue; 4944 /* form outgoing message for i-structure: 4945 buf_si[0]: nrows to be sent 4946 [1:nrows]: row index (global) 4947 [nrows+1:2*nrows+1]: i-structure index 4948 */ 4949 /*-------------------------------------------*/ 4950 nrows = len_si[proc]/2 - 1; 4951 buf_si_i = buf_si + nrows+1; 4952 buf_si[0] = nrows; 4953 buf_si_i[0] = 0; 4954 nrows = 0; 4955 for (i=owners[proc]; i<owners[proc+1]; i++) { 4956 anzi = ai[i+1] - ai[i]; 4957 if (anzi) { 4958 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4959 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4960 nrows++; 4961 } 4962 } 4963 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4964 k++; 4965 buf_si += len_si[proc]; 4966 } 4967 4968 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4969 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4970 4971 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4972 for (i=0; i<merge->nrecv; i++) { 4973 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4974 } 4975 4976 ierr = PetscFree(len_si);CHKERRQ(ierr); 4977 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4978 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4979 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4980 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4981 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4982 ierr = PetscFree(status);CHKERRQ(ierr); 4983 4984 /* compute a local seq matrix in each processor */ 4985 /*----------------------------------------------*/ 4986 /* allocate bi array and free space for accumulating nonzero column info */ 4987 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4988 bi[0] = 0; 4989 4990 /* create and initialize a linked list */ 4991 nlnk = N+1; 4992 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4993 4994 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4995 len = ai[owners[rank+1]] - ai[owners[rank]]; 4996 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4997 4998 current_space = free_space; 4999 5000 /* determine symbolic info for each local row */ 5001 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 5002 5003 for (k=0; k<merge->nrecv; k++) { 5004 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5005 nrows = *buf_ri_k[k]; 5006 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5007 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 5008 } 5009 5010 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 5011 len = 0; 5012 for (i=0; i<m; i++) { 5013 bnzi = 0; 5014 /* add local non-zero cols of this proc's seqmat into lnk */ 5015 arow = owners[rank] + i; 5016 anzi = ai[arow+1] - ai[arow]; 5017 aj = a->j + ai[arow]; 5018 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5019 bnzi += nlnk; 5020 /* add received col data into lnk */ 5021 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 5022 if (i == *nextrow[k]) { /* i-th row */ 5023 anzi = *(nextai[k]+1) - *nextai[k]; 5024 aj = buf_rj[k] + *nextai[k]; 5025 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5026 bnzi += nlnk; 5027 nextrow[k]++; nextai[k]++; 5028 } 5029 } 5030 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5031 5032 /* if free space is not available, make more free space */ 5033 if (current_space->local_remaining<bnzi) { 5034 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 5035 nspacedouble++; 5036 } 5037 /* copy data into free space, then initialize lnk */ 5038 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5039 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5040 5041 current_space->array += bnzi; 5042 current_space->local_used += bnzi; 5043 current_space->local_remaining -= bnzi; 5044 5045 bi[i+1] = bi[i] + bnzi; 5046 } 5047 5048 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5049 5050 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 5051 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5052 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5053 5054 /* create symbolic parallel matrix B_mpi */ 5055 /*---------------------------------------*/ 5056 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5057 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5058 if (n==PETSC_DECIDE) { 5059 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5060 } else { 5061 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5062 } 5063 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5064 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5065 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5066 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5067 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5068 5069 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5070 B_mpi->assembled = PETSC_FALSE; 5071 merge->bi = bi; 5072 merge->bj = bj; 5073 merge->buf_ri = buf_ri; 5074 merge->buf_rj = buf_rj; 5075 merge->coi = NULL; 5076 merge->coj = NULL; 5077 merge->owners_co = NULL; 5078 5079 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5080 5081 /* attach the supporting struct to B_mpi for reuse */ 5082 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5083 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5084 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 5085 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5086 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5087 *mpimat = B_mpi; 5088 5089 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5090 PetscFunctionReturn(0); 5091 } 5092 5093 /*@C 5094 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5095 matrices from each processor 5096 5097 Collective 5098 5099 Input Parameters: 5100 + comm - the communicators the parallel matrix will live on 5101 . seqmat - the input sequential matrices 5102 . m - number of local rows (or PETSC_DECIDE) 5103 . n - number of local columns (or PETSC_DECIDE) 5104 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5105 5106 Output Parameter: 5107 . mpimat - the parallel matrix generated 5108 5109 Level: advanced 5110 5111 Notes: 5112 The dimensions of the sequential matrix in each processor MUST be the same. 5113 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5114 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5115 @*/ 5116 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5117 { 5118 PetscErrorCode ierr; 5119 PetscMPIInt size; 5120 5121 PetscFunctionBegin; 5122 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5123 if (size == 1) { 5124 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5125 if (scall == MAT_INITIAL_MATRIX) { 5126 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5127 } else { 5128 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5129 } 5130 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5131 PetscFunctionReturn(0); 5132 } 5133 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5134 if (scall == MAT_INITIAL_MATRIX) { 5135 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5136 } 5137 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5138 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5139 PetscFunctionReturn(0); 5140 } 5141 5142 /*@ 5143 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5144 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5145 with MatGetSize() 5146 5147 Not Collective 5148 5149 Input Parameters: 5150 + A - the matrix 5151 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5152 5153 Output Parameter: 5154 . A_loc - the local sequential matrix generated 5155 5156 Level: developer 5157 5158 Notes: 5159 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5160 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5161 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5162 modify the values of the returned A_loc. 5163 5164 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5165 5166 @*/ 5167 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5168 { 5169 PetscErrorCode ierr; 5170 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5171 Mat_SeqAIJ *mat,*a,*b; 5172 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5173 MatScalar *aa,*ba,*cam; 5174 PetscScalar *ca; 5175 PetscMPIInt size; 5176 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5177 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5178 PetscBool match; 5179 5180 PetscFunctionBegin; 5181 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5182 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5183 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 5184 if (size == 1) { 5185 if (scall == MAT_INITIAL_MATRIX) { 5186 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5187 *A_loc = mpimat->A; 5188 } else if (scall == MAT_REUSE_MATRIX) { 5189 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5190 } 5191 PetscFunctionReturn(0); 5192 } 5193 5194 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5195 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5196 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5197 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5198 aa = a->a; ba = b->a; 5199 if (scall == MAT_INITIAL_MATRIX) { 5200 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5201 ci[0] = 0; 5202 for (i=0; i<am; i++) { 5203 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5204 } 5205 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5206 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5207 k = 0; 5208 for (i=0; i<am; i++) { 5209 ncols_o = bi[i+1] - bi[i]; 5210 ncols_d = ai[i+1] - ai[i]; 5211 /* off-diagonal portion of A */ 5212 for (jo=0; jo<ncols_o; jo++) { 5213 col = cmap[*bj]; 5214 if (col >= cstart) break; 5215 cj[k] = col; bj++; 5216 ca[k++] = *ba++; 5217 } 5218 /* diagonal portion of A */ 5219 for (j=0; j<ncols_d; j++) { 5220 cj[k] = cstart + *aj++; 5221 ca[k++] = *aa++; 5222 } 5223 /* off-diagonal portion of A */ 5224 for (j=jo; j<ncols_o; j++) { 5225 cj[k] = cmap[*bj++]; 5226 ca[k++] = *ba++; 5227 } 5228 } 5229 /* put together the new matrix */ 5230 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5231 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5232 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5233 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5234 mat->free_a = PETSC_TRUE; 5235 mat->free_ij = PETSC_TRUE; 5236 mat->nonew = 0; 5237 } else if (scall == MAT_REUSE_MATRIX) { 5238 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5239 ci = mat->i; cj = mat->j; cam = mat->a; 5240 for (i=0; i<am; i++) { 5241 /* off-diagonal portion of A */ 5242 ncols_o = bi[i+1] - bi[i]; 5243 for (jo=0; jo<ncols_o; jo++) { 5244 col = cmap[*bj]; 5245 if (col >= cstart) break; 5246 *cam++ = *ba++; bj++; 5247 } 5248 /* diagonal portion of A */ 5249 ncols_d = ai[i+1] - ai[i]; 5250 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5251 /* off-diagonal portion of A */ 5252 for (j=jo; j<ncols_o; j++) { 5253 *cam++ = *ba++; bj++; 5254 } 5255 } 5256 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5257 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5258 PetscFunctionReturn(0); 5259 } 5260 5261 /*@C 5262 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5263 5264 Not Collective 5265 5266 Input Parameters: 5267 + A - the matrix 5268 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5269 - row, col - index sets of rows and columns to extract (or NULL) 5270 5271 Output Parameter: 5272 . A_loc - the local sequential matrix generated 5273 5274 Level: developer 5275 5276 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5277 5278 @*/ 5279 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5280 { 5281 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5282 PetscErrorCode ierr; 5283 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5284 IS isrowa,iscola; 5285 Mat *aloc; 5286 PetscBool match; 5287 5288 PetscFunctionBegin; 5289 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5290 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5291 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5292 if (!row) { 5293 start = A->rmap->rstart; end = A->rmap->rend; 5294 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5295 } else { 5296 isrowa = *row; 5297 } 5298 if (!col) { 5299 start = A->cmap->rstart; 5300 cmap = a->garray; 5301 nzA = a->A->cmap->n; 5302 nzB = a->B->cmap->n; 5303 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5304 ncols = 0; 5305 for (i=0; i<nzB; i++) { 5306 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5307 else break; 5308 } 5309 imark = i; 5310 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5311 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5312 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5313 } else { 5314 iscola = *col; 5315 } 5316 if (scall != MAT_INITIAL_MATRIX) { 5317 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5318 aloc[0] = *A_loc; 5319 } 5320 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5321 if (!col) { /* attach global id of condensed columns */ 5322 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5323 } 5324 *A_loc = aloc[0]; 5325 ierr = PetscFree(aloc);CHKERRQ(ierr); 5326 if (!row) { 5327 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5328 } 5329 if (!col) { 5330 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5331 } 5332 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5333 PetscFunctionReturn(0); 5334 } 5335 5336 /* 5337 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5338 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5339 * on a global size. 5340 * */ 5341 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5342 { 5343 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5344 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5345 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5346 PetscMPIInt owner; 5347 PetscSFNode *iremote,*oiremote; 5348 const PetscInt *lrowindices; 5349 PetscErrorCode ierr; 5350 PetscSF sf,osf; 5351 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5352 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5353 MPI_Comm comm; 5354 ISLocalToGlobalMapping mapping; 5355 5356 PetscFunctionBegin; 5357 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5358 /* plocalsize is the number of roots 5359 * nrows is the number of leaves 5360 * */ 5361 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5362 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5363 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5364 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5365 for (i=0;i<nrows;i++) { 5366 /* Find a remote index and an owner for a row 5367 * The row could be local or remote 5368 * */ 5369 owner = 0; 5370 lidx = 0; 5371 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5372 iremote[i].index = lidx; 5373 iremote[i].rank = owner; 5374 } 5375 /* Create SF to communicate how many nonzero columns for each row */ 5376 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5377 /* SF will figure out the number of nonzero colunms for each row, and their 5378 * offsets 5379 * */ 5380 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5381 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5382 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5383 5384 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5385 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5386 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5387 roffsets[0] = 0; 5388 roffsets[1] = 0; 5389 for (i=0;i<plocalsize;i++) { 5390 /* diag */ 5391 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5392 /* off diag */ 5393 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5394 /* compute offsets so that we relative location for each row */ 5395 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5396 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5397 } 5398 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5399 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5400 /* 'r' means root, and 'l' means leaf */ 5401 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5402 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5403 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5404 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5405 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5406 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5407 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5408 dntotalcols = 0; 5409 ontotalcols = 0; 5410 ncol = 0; 5411 for (i=0;i<nrows;i++) { 5412 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5413 ncol = PetscMax(pnnz[i],ncol); 5414 /* diag */ 5415 dntotalcols += nlcols[i*2+0]; 5416 /* off diag */ 5417 ontotalcols += nlcols[i*2+1]; 5418 } 5419 /* We do not need to figure the right number of columns 5420 * since all the calculations will be done by going through the raw data 5421 * */ 5422 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5423 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5424 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5425 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5426 /* diag */ 5427 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5428 /* off diag */ 5429 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5430 /* diag */ 5431 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5432 /* off diag */ 5433 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5434 dntotalcols = 0; 5435 ontotalcols = 0; 5436 ntotalcols = 0; 5437 for (i=0;i<nrows;i++) { 5438 owner = 0; 5439 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5440 /* Set iremote for diag matrix */ 5441 for (j=0;j<nlcols[i*2+0];j++) { 5442 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5443 iremote[dntotalcols].rank = owner; 5444 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5445 ilocal[dntotalcols++] = ntotalcols++; 5446 } 5447 /* off diag */ 5448 for (j=0;j<nlcols[i*2+1];j++) { 5449 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5450 oiremote[ontotalcols].rank = owner; 5451 oilocal[ontotalcols++] = ntotalcols++; 5452 } 5453 } 5454 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5455 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5456 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5457 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5458 /* P serves as roots and P_oth is leaves 5459 * Diag matrix 5460 * */ 5461 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5462 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5463 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5464 5465 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5466 /* Off diag */ 5467 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5468 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5469 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5470 /* We operate on the matrix internal data for saving memory */ 5471 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5472 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5473 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5474 /* Convert to global indices for diag matrix */ 5475 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5476 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5477 /* We want P_oth store global indices */ 5478 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5479 /* Use memory scalable approach */ 5480 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5481 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5482 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5483 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5484 /* Convert back to local indices */ 5485 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5486 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5487 nout = 0; 5488 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5489 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5490 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5491 /* Exchange values */ 5492 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5493 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5494 /* Stop PETSc from shrinking memory */ 5495 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5496 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5497 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5498 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5499 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5500 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5501 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5502 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5503 PetscFunctionReturn(0); 5504 } 5505 5506 /* 5507 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5508 * This supports MPIAIJ and MAIJ 5509 * */ 5510 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5511 { 5512 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5513 Mat_SeqAIJ *p_oth; 5514 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5515 IS rows,map; 5516 PetscHMapI hamp; 5517 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5518 MPI_Comm comm; 5519 PetscSF sf,osf; 5520 PetscBool has; 5521 PetscErrorCode ierr; 5522 5523 PetscFunctionBegin; 5524 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5525 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5526 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5527 * and then create a submatrix (that often is an overlapping matrix) 5528 * */ 5529 if (reuse == MAT_INITIAL_MATRIX) { 5530 /* Use a hash table to figure out unique keys */ 5531 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5532 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5533 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5534 count = 0; 5535 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5536 for (i=0;i<a->B->cmap->n;i++) { 5537 key = a->garray[i]/dof; 5538 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5539 if (!has) { 5540 mapping[i] = count; 5541 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5542 } else { 5543 /* Current 'i' has the same value the previous step */ 5544 mapping[i] = count-1; 5545 } 5546 } 5547 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5548 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5549 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5550 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5551 off = 0; 5552 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5553 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5554 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5555 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5556 /* In case, the matrix was already created but users want to recreate the matrix */ 5557 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5558 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5559 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5560 ierr = ISDestroy(&map);CHKERRQ(ierr); 5561 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5562 } else if (reuse == MAT_REUSE_MATRIX) { 5563 /* If matrix was already created, we simply update values using SF objects 5564 * that as attached to the matrix ealier. 5565 * */ 5566 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5567 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5568 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5569 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5570 /* Update values in place */ 5571 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5572 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5573 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5574 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5575 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5576 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5577 PetscFunctionReturn(0); 5578 } 5579 5580 /*@C 5581 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5582 5583 Collective on Mat 5584 5585 Input Parameters: 5586 + A,B - the matrices in mpiaij format 5587 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5588 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5589 5590 Output Parameter: 5591 + rowb, colb - index sets of rows and columns of B to extract 5592 - B_seq - the sequential matrix generated 5593 5594 Level: developer 5595 5596 @*/ 5597 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5598 { 5599 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5600 PetscErrorCode ierr; 5601 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5602 IS isrowb,iscolb; 5603 Mat *bseq=NULL; 5604 5605 PetscFunctionBegin; 5606 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5607 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5608 } 5609 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5610 5611 if (scall == MAT_INITIAL_MATRIX) { 5612 start = A->cmap->rstart; 5613 cmap = a->garray; 5614 nzA = a->A->cmap->n; 5615 nzB = a->B->cmap->n; 5616 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5617 ncols = 0; 5618 for (i=0; i<nzB; i++) { /* row < local row index */ 5619 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5620 else break; 5621 } 5622 imark = i; 5623 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5624 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5625 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5626 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5627 } else { 5628 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5629 isrowb = *rowb; iscolb = *colb; 5630 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5631 bseq[0] = *B_seq; 5632 } 5633 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5634 *B_seq = bseq[0]; 5635 ierr = PetscFree(bseq);CHKERRQ(ierr); 5636 if (!rowb) { 5637 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5638 } else { 5639 *rowb = isrowb; 5640 } 5641 if (!colb) { 5642 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5643 } else { 5644 *colb = iscolb; 5645 } 5646 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5647 PetscFunctionReturn(0); 5648 } 5649 5650 /* 5651 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5652 of the OFF-DIAGONAL portion of local A 5653 5654 Collective on Mat 5655 5656 Input Parameters: 5657 + A,B - the matrices in mpiaij format 5658 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5659 5660 Output Parameter: 5661 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5662 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5663 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5664 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5665 5666 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5667 for this matrix. This is not desirable.. 5668 5669 Level: developer 5670 5671 */ 5672 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5673 { 5674 PetscErrorCode ierr; 5675 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5676 Mat_SeqAIJ *b_oth; 5677 VecScatter ctx; 5678 MPI_Comm comm; 5679 const PetscMPIInt *rprocs,*sprocs; 5680 const PetscInt *srow,*rstarts,*sstarts; 5681 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5682 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5683 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5684 MPI_Request *rwaits = NULL,*swaits = NULL; 5685 MPI_Status rstatus; 5686 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5687 5688 PetscFunctionBegin; 5689 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5690 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5691 5692 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5693 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5694 } 5695 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5696 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5697 5698 if (size == 1) { 5699 startsj_s = NULL; 5700 bufa_ptr = NULL; 5701 *B_oth = NULL; 5702 PetscFunctionReturn(0); 5703 } 5704 5705 ctx = a->Mvctx; 5706 tag = ((PetscObject)ctx)->tag; 5707 5708 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5709 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5710 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5711 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5712 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5713 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5714 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5715 5716 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5717 if (scall == MAT_INITIAL_MATRIX) { 5718 /* i-array */ 5719 /*---------*/ 5720 /* post receives */ 5721 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5722 for (i=0; i<nrecvs; i++) { 5723 rowlen = rvalues + rstarts[i]*rbs; 5724 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5725 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5726 } 5727 5728 /* pack the outgoing message */ 5729 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5730 5731 sstartsj[0] = 0; 5732 rstartsj[0] = 0; 5733 len = 0; /* total length of j or a array to be sent */ 5734 if (nsends) { 5735 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5736 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5737 } 5738 for (i=0; i<nsends; i++) { 5739 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5740 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5741 for (j=0; j<nrows; j++) { 5742 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5743 for (l=0; l<sbs; l++) { 5744 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5745 5746 rowlen[j*sbs+l] = ncols; 5747 5748 len += ncols; 5749 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5750 } 5751 k++; 5752 } 5753 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5754 5755 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5756 } 5757 /* recvs and sends of i-array are completed */ 5758 i = nrecvs; 5759 while (i--) { 5760 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5761 } 5762 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5763 ierr = PetscFree(svalues);CHKERRQ(ierr); 5764 5765 /* allocate buffers for sending j and a arrays */ 5766 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5767 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5768 5769 /* create i-array of B_oth */ 5770 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5771 5772 b_othi[0] = 0; 5773 len = 0; /* total length of j or a array to be received */ 5774 k = 0; 5775 for (i=0; i<nrecvs; i++) { 5776 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5777 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5778 for (j=0; j<nrows; j++) { 5779 b_othi[k+1] = b_othi[k] + rowlen[j]; 5780 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5781 k++; 5782 } 5783 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5784 } 5785 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5786 5787 /* allocate space for j and a arrrays of B_oth */ 5788 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5789 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5790 5791 /* j-array */ 5792 /*---------*/ 5793 /* post receives of j-array */ 5794 for (i=0; i<nrecvs; i++) { 5795 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5796 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5797 } 5798 5799 /* pack the outgoing message j-array */ 5800 if (nsends) k = sstarts[0]; 5801 for (i=0; i<nsends; i++) { 5802 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5803 bufJ = bufj+sstartsj[i]; 5804 for (j=0; j<nrows; j++) { 5805 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5806 for (ll=0; ll<sbs; ll++) { 5807 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5808 for (l=0; l<ncols; l++) { 5809 *bufJ++ = cols[l]; 5810 } 5811 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5812 } 5813 } 5814 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5815 } 5816 5817 /* recvs and sends of j-array are completed */ 5818 i = nrecvs; 5819 while (i--) { 5820 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5821 } 5822 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5823 } else if (scall == MAT_REUSE_MATRIX) { 5824 sstartsj = *startsj_s; 5825 rstartsj = *startsj_r; 5826 bufa = *bufa_ptr; 5827 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5828 b_otha = b_oth->a; 5829 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5830 5831 /* a-array */ 5832 /*---------*/ 5833 /* post receives of a-array */ 5834 for (i=0; i<nrecvs; i++) { 5835 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5836 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5837 } 5838 5839 /* pack the outgoing message a-array */ 5840 if (nsends) k = sstarts[0]; 5841 for (i=0; i<nsends; i++) { 5842 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5843 bufA = bufa+sstartsj[i]; 5844 for (j=0; j<nrows; j++) { 5845 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5846 for (ll=0; ll<sbs; ll++) { 5847 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5848 for (l=0; l<ncols; l++) { 5849 *bufA++ = vals[l]; 5850 } 5851 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5852 } 5853 } 5854 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5855 } 5856 /* recvs and sends of a-array are completed */ 5857 i = nrecvs; 5858 while (i--) { 5859 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5860 } 5861 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5862 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5863 5864 if (scall == MAT_INITIAL_MATRIX) { 5865 /* put together the new matrix */ 5866 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5867 5868 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5869 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5870 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5871 b_oth->free_a = PETSC_TRUE; 5872 b_oth->free_ij = PETSC_TRUE; 5873 b_oth->nonew = 0; 5874 5875 ierr = PetscFree(bufj);CHKERRQ(ierr); 5876 if (!startsj_s || !bufa_ptr) { 5877 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5878 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5879 } else { 5880 *startsj_s = sstartsj; 5881 *startsj_r = rstartsj; 5882 *bufa_ptr = bufa; 5883 } 5884 } 5885 5886 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5887 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5888 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5889 PetscFunctionReturn(0); 5890 } 5891 5892 /*@C 5893 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5894 5895 Not Collective 5896 5897 Input Parameters: 5898 . A - The matrix in mpiaij format 5899 5900 Output Parameter: 5901 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5902 . colmap - A map from global column index to local index into lvec 5903 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5904 5905 Level: developer 5906 5907 @*/ 5908 #if defined(PETSC_USE_CTABLE) 5909 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5910 #else 5911 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5912 #endif 5913 { 5914 Mat_MPIAIJ *a; 5915 5916 PetscFunctionBegin; 5917 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5918 PetscValidPointer(lvec, 2); 5919 PetscValidPointer(colmap, 3); 5920 PetscValidPointer(multScatter, 4); 5921 a = (Mat_MPIAIJ*) A->data; 5922 if (lvec) *lvec = a->lvec; 5923 if (colmap) *colmap = a->colmap; 5924 if (multScatter) *multScatter = a->Mvctx; 5925 PetscFunctionReturn(0); 5926 } 5927 5928 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5929 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5931 #if defined(PETSC_HAVE_MKL_SPARSE) 5932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5933 #endif 5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5936 #if defined(PETSC_HAVE_ELEMENTAL) 5937 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5938 #endif 5939 #if defined(PETSC_HAVE_SCALAPACK) 5940 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5941 #endif 5942 #if defined(PETSC_HAVE_HYPRE) 5943 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5944 #endif 5945 #if defined(PETSC_HAVE_CUDA) 5946 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5947 #endif 5948 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5949 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5950 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5951 5952 /* 5953 Computes (B'*A')' since computing B*A directly is untenable 5954 5955 n p p 5956 [ ] [ ] [ ] 5957 m [ A ] * n [ B ] = m [ C ] 5958 [ ] [ ] [ ] 5959 5960 */ 5961 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5962 { 5963 PetscErrorCode ierr; 5964 Mat At,Bt,Ct; 5965 5966 PetscFunctionBegin; 5967 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5968 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5969 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5970 ierr = MatDestroy(&At);CHKERRQ(ierr); 5971 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5972 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5973 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5974 PetscFunctionReturn(0); 5975 } 5976 5977 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5978 { 5979 PetscErrorCode ierr; 5980 PetscBool cisdense; 5981 5982 PetscFunctionBegin; 5983 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5984 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5985 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5986 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5987 if (!cisdense) { 5988 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5989 } 5990 ierr = MatSetUp(C);CHKERRQ(ierr); 5991 5992 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5993 PetscFunctionReturn(0); 5994 } 5995 5996 /* ----------------------------------------------------------------*/ 5997 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5998 { 5999 Mat_Product *product = C->product; 6000 Mat A = product->A,B=product->B; 6001 6002 PetscFunctionBegin; 6003 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6004 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6005 6006 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6007 C->ops->productsymbolic = MatProductSymbolic_AB; 6008 PetscFunctionReturn(0); 6009 } 6010 6011 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6012 { 6013 PetscErrorCode ierr; 6014 Mat_Product *product = C->product; 6015 6016 PetscFunctionBegin; 6017 if (product->type == MATPRODUCT_AB) { 6018 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6019 } 6020 PetscFunctionReturn(0); 6021 } 6022 /* ----------------------------------------------------------------*/ 6023 6024 /*MC 6025 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6026 6027 Options Database Keys: 6028 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6029 6030 Level: beginner 6031 6032 Notes: 6033 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6034 in this case the values associated with the rows and columns one passes in are set to zero 6035 in the matrix 6036 6037 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6038 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6039 6040 .seealso: MatCreateAIJ() 6041 M*/ 6042 6043 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6044 { 6045 Mat_MPIAIJ *b; 6046 PetscErrorCode ierr; 6047 PetscMPIInt size; 6048 6049 PetscFunctionBegin; 6050 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 6051 6052 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6053 B->data = (void*)b; 6054 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6055 B->assembled = PETSC_FALSE; 6056 B->insertmode = NOT_SET_VALUES; 6057 b->size = size; 6058 6059 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 6060 6061 /* build cache for off array entries formed */ 6062 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6063 6064 b->donotstash = PETSC_FALSE; 6065 b->colmap = NULL; 6066 b->garray = NULL; 6067 b->roworiented = PETSC_TRUE; 6068 6069 /* stuff used for matrix vector multiply */ 6070 b->lvec = NULL; 6071 b->Mvctx = NULL; 6072 6073 /* stuff for MatGetRow() */ 6074 b->rowindices = NULL; 6075 b->rowvalues = NULL; 6076 b->getrowactive = PETSC_FALSE; 6077 6078 /* flexible pointer used in CUSP/CUSPARSE classes */ 6079 b->spptr = NULL; 6080 6081 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6091 #if defined(PETSC_HAVE_MKL_SPARSE) 6092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6093 #endif 6094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6097 #if defined(PETSC_HAVE_ELEMENTAL) 6098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6099 #endif 6100 #if defined(PETSC_HAVE_SCALAPACK) 6101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6102 #endif 6103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6105 #if defined(PETSC_HAVE_HYPRE) 6106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6108 #endif 6109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6111 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6112 PetscFunctionReturn(0); 6113 } 6114 6115 /*@C 6116 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6117 and "off-diagonal" part of the matrix in CSR format. 6118 6119 Collective 6120 6121 Input Parameters: 6122 + comm - MPI communicator 6123 . m - number of local rows (Cannot be PETSC_DECIDE) 6124 . n - This value should be the same as the local size used in creating the 6125 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6126 calculated if N is given) For square matrices n is almost always m. 6127 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6128 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6129 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6130 . j - column indices 6131 . a - matrix values 6132 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6133 . oj - column indices 6134 - oa - matrix values 6135 6136 Output Parameter: 6137 . mat - the matrix 6138 6139 Level: advanced 6140 6141 Notes: 6142 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6143 must free the arrays once the matrix has been destroyed and not before. 6144 6145 The i and j indices are 0 based 6146 6147 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6148 6149 This sets local rows and cannot be used to set off-processor values. 6150 6151 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6152 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6153 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6154 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6155 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6156 communication if it is known that only local entries will be set. 6157 6158 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6159 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6160 @*/ 6161 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6162 { 6163 PetscErrorCode ierr; 6164 Mat_MPIAIJ *maij; 6165 6166 PetscFunctionBegin; 6167 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6168 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6169 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6170 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6171 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6172 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6173 maij = (Mat_MPIAIJ*) (*mat)->data; 6174 6175 (*mat)->preallocated = PETSC_TRUE; 6176 6177 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6178 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6179 6180 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6181 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6182 6183 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6184 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6185 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6186 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6187 6188 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6189 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6190 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6191 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6192 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6193 PetscFunctionReturn(0); 6194 } 6195 6196 /* 6197 Special version for direct calls from Fortran 6198 */ 6199 #include <petsc/private/fortranimpl.h> 6200 6201 /* Change these macros so can be used in void function */ 6202 #undef CHKERRQ 6203 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6204 #undef SETERRQ2 6205 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6206 #undef SETERRQ3 6207 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6208 #undef SETERRQ 6209 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6210 6211 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6212 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6213 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6214 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6215 #else 6216 #endif 6217 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6218 { 6219 Mat mat = *mmat; 6220 PetscInt m = *mm, n = *mn; 6221 InsertMode addv = *maddv; 6222 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6223 PetscScalar value; 6224 PetscErrorCode ierr; 6225 6226 MatCheckPreallocated(mat,1); 6227 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6228 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6229 { 6230 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6231 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6232 PetscBool roworiented = aij->roworiented; 6233 6234 /* Some Variables required in the macro */ 6235 Mat A = aij->A; 6236 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6237 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6238 MatScalar *aa = a->a; 6239 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6240 Mat B = aij->B; 6241 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6242 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6243 MatScalar *ba = b->a; 6244 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6245 * cannot use "#if defined" inside a macro. */ 6246 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6247 6248 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6249 PetscInt nonew = a->nonew; 6250 MatScalar *ap1,*ap2; 6251 6252 PetscFunctionBegin; 6253 for (i=0; i<m; i++) { 6254 if (im[i] < 0) continue; 6255 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6256 if (im[i] >= rstart && im[i] < rend) { 6257 row = im[i] - rstart; 6258 lastcol1 = -1; 6259 rp1 = aj + ai[row]; 6260 ap1 = aa + ai[row]; 6261 rmax1 = aimax[row]; 6262 nrow1 = ailen[row]; 6263 low1 = 0; 6264 high1 = nrow1; 6265 lastcol2 = -1; 6266 rp2 = bj + bi[row]; 6267 ap2 = ba + bi[row]; 6268 rmax2 = bimax[row]; 6269 nrow2 = bilen[row]; 6270 low2 = 0; 6271 high2 = nrow2; 6272 6273 for (j=0; j<n; j++) { 6274 if (roworiented) value = v[i*n+j]; 6275 else value = v[i+j*m]; 6276 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6277 if (in[j] >= cstart && in[j] < cend) { 6278 col = in[j] - cstart; 6279 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6280 #if defined(PETSC_HAVE_DEVICE) 6281 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6282 #endif 6283 } else if (in[j] < 0) continue; 6284 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6285 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6286 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6287 } else { 6288 if (mat->was_assembled) { 6289 if (!aij->colmap) { 6290 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6291 } 6292 #if defined(PETSC_USE_CTABLE) 6293 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6294 col--; 6295 #else 6296 col = aij->colmap[in[j]] - 1; 6297 #endif 6298 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6299 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6300 col = in[j]; 6301 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6302 B = aij->B; 6303 b = (Mat_SeqAIJ*)B->data; 6304 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6305 rp2 = bj + bi[row]; 6306 ap2 = ba + bi[row]; 6307 rmax2 = bimax[row]; 6308 nrow2 = bilen[row]; 6309 low2 = 0; 6310 high2 = nrow2; 6311 bm = aij->B->rmap->n; 6312 ba = b->a; 6313 inserted = PETSC_FALSE; 6314 } 6315 } else col = in[j]; 6316 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6317 #if defined(PETSC_HAVE_DEVICE) 6318 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6319 #endif 6320 } 6321 } 6322 } else if (!aij->donotstash) { 6323 if (roworiented) { 6324 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6325 } else { 6326 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6327 } 6328 } 6329 } 6330 } 6331 PetscFunctionReturnVoid(); 6332 } 6333