1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = NULL; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=NULL; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRMPI(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRMPI(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRMPI(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRMPI(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRMPI(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRMPI(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRMPI(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRMPI(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRMPI(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRMPI(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRMPI(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRMPI(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to access. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_DEVICE) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 583 if (im[i] >= rstart && im[i] < rend) { 584 row = im[i] - rstart; 585 lastcol1 = -1; 586 rp1 = aj + ai[row]; 587 ap1 = aa + ai[row]; 588 rmax1 = aimax[row]; 589 nrow1 = ailen[row]; 590 low1 = 0; 591 high1 = nrow1; 592 lastcol2 = -1; 593 rp2 = bj + bi[row]; 594 ap2 = ba + bi[row]; 595 rmax2 = bimax[row]; 596 nrow2 = bilen[row]; 597 low2 = 0; 598 high2 = nrow2; 599 600 for (j=0; j<n; j++) { 601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 603 if (in[j] >= cstart && in[j] < cend) { 604 col = in[j] - cstart; 605 nonew = a->nonew; 606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 607 #if defined(PETSC_HAVE_DEVICE) 608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 609 #endif 610 } else if (in[j] < 0) continue; 611 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 612 else { 613 if (mat->was_assembled) { 614 if (!aij->colmap) { 615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 616 } 617 #if defined(PETSC_USE_CTABLE) 618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 619 col--; 620 #else 621 col = aij->colmap[in[j]] - 1; 622 #endif 623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 625 col = in[j]; 626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 627 B = aij->B; 628 b = (Mat_SeqAIJ*)B->data; 629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 630 rp2 = bj + bi[row]; 631 ap2 = ba + bi[row]; 632 rmax2 = bimax[row]; 633 nrow2 = bilen[row]; 634 low2 = 0; 635 high2 = nrow2; 636 bm = aij->B->rmap->n; 637 ba = b->a; 638 inserted = PETSC_FALSE; 639 } else if (col < 0) { 640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 643 } 644 } else col = in[j]; 645 nonew = b->nonew; 646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 647 #if defined(PETSC_HAVE_DEVICE) 648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 649 #endif 650 } 651 } 652 } else { 653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 654 if (!aij->donotstash) { 655 mat->assembled = PETSC_FALSE; 656 if (roworiented) { 657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 658 } else { 659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 /* 668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 671 */ 672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 673 { 674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 675 Mat A = aij->A; /* diagonal part of the matrix */ 676 Mat B = aij->B; /* offdiagonal part of the matrix */ 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 680 PetscInt *ailen = a->ilen,*aj = a->j; 681 PetscInt *bilen = b->ilen,*bj = b->j; 682 PetscInt am = aij->A->rmap->n,j; 683 PetscInt diag_so_far = 0,dnz; 684 PetscInt offd_so_far = 0,onz; 685 686 PetscFunctionBegin; 687 /* Iterate over all rows of the matrix */ 688 for (j=0; j<am; j++) { 689 dnz = onz = 0; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[diag_so_far++] = mat_j[col] - cstart; 695 dnz++; 696 } else { /* off-diagonal entries */ 697 bj[offd_so_far++] = mat_j[col]; 698 onz++; 699 } 700 } 701 ailen[j] = dnz; 702 bilen[j] = onz; 703 } 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 713 */ 714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 717 Mat A = aij->A; /* diagonal part of the matrix */ 718 Mat B = aij->B; /* offdiagonal part of the matrix */ 719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 723 PetscInt *ailen = a->ilen,*aj = a->j; 724 PetscInt *bilen = b->ilen,*bj = b->j; 725 PetscInt am = aij->A->rmap->n,j; 726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 728 PetscScalar *aa = a->a,*ba = b->a; 729 730 PetscFunctionBegin; 731 /* Iterate over all rows of the matrix */ 732 for (j=0; j<am; j++) { 733 dnz_row = onz_row = 0; 734 rowstart_offd = full_offd_i[j]; 735 rowstart_diag = full_diag_i[j]; 736 /* Iterate over all non-zero columns of the current row */ 737 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 738 /* If column is in the diagonal */ 739 if (mat_j[col] >= cstart && mat_j[col] < cend) { 740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 741 aa[rowstart_diag+dnz_row] = mat_a[col]; 742 dnz_row++; 743 } else { /* off-diagonal entries */ 744 bj[rowstart_offd+onz_row] = mat_j[col]; 745 ba[rowstart_offd+onz_row] = mat_a[col]; 746 onz_row++; 747 } 748 } 749 ailen[j] = dnz_row; 750 bilen[j] = onz_row; 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 766 if (idxm[i] >= rstart && idxm[i] < rend) { 767 row = idxm[i] - rstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 771 if (idxn[j] >= cstart && idxn[j] < cend) { 772 col = idxn[j] - cstart; 773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!aij->colmap) { 776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined(PETSC_USE_CTABLE) 779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 780 col--; 781 #else 782 col = aij->colmap[idxn[j]] - 1; 783 #endif 784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 785 else { 786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } 788 } 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 796 { 797 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 798 PetscErrorCode ierr; 799 PetscInt nstash,reallocs; 800 801 PetscFunctionBegin; 802 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 803 804 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 805 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 806 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 807 PetscFunctionReturn(0); 808 } 809 810 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 811 { 812 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 813 PetscErrorCode ierr; 814 PetscMPIInt n; 815 PetscInt i,j,rstart,ncols,flg; 816 PetscInt *row,*col; 817 PetscBool other_disassembled; 818 PetscScalar *val; 819 820 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 821 822 PetscFunctionBegin; 823 if (!aij->donotstash && !mat->nooffprocentries) { 824 while (1) { 825 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 826 if (!flg) break; 827 828 for (i=0; i<n;) { 829 /* Now identify the consecutive vals belonging to the same row */ 830 for (j=i,rstart=row[j]; j<n; j++) { 831 if (row[j] != rstart) break; 832 } 833 if (j < n) ncols = j-i; 834 else ncols = n-i; 835 /* Now assemble all these values with a single function call */ 836 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 837 i = j; 838 } 839 } 840 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 841 } 842 #if defined(PETSC_HAVE_DEVICE) 843 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 844 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 845 if (mat->boundtocpu) { 846 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 847 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 848 } 849 #endif 850 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 851 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 852 853 /* determine if any processor has disassembled, if so we must 854 also disassemble ourself, in order that we may reassemble. */ 855 /* 856 if nonzero structure of submatrix B cannot change then we know that 857 no processor disassembled thus we can skip this stuff 858 */ 859 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 860 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 861 if (mat->was_assembled && !other_disassembled) { 862 #if defined(PETSC_HAVE_DEVICE) 863 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 864 #endif 865 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 866 } 867 } 868 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 869 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 870 } 871 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 872 #if defined(PETSC_HAVE_DEVICE) 873 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 874 #endif 875 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 876 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 877 878 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 879 880 aij->rowvalues = NULL; 881 882 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 883 884 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 885 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 886 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 887 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 888 } 889 #if defined(PETSC_HAVE_DEVICE) 890 mat->offloadmask = PETSC_OFFLOAD_BOTH; 891 #endif 892 PetscFunctionReturn(0); 893 } 894 895 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 896 { 897 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 898 PetscErrorCode ierr; 899 900 PetscFunctionBegin; 901 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 902 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } 905 906 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 909 PetscObjectState sA, sB; 910 PetscInt *lrows; 911 PetscInt r, len; 912 PetscBool cong, lch, gch; 913 PetscErrorCode ierr; 914 915 PetscFunctionBegin; 916 /* get locally owned rows */ 917 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 918 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 919 /* fix right hand side if needed */ 920 if (x && b) { 921 const PetscScalar *xx; 922 PetscScalar *bb; 923 924 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 925 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 926 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 927 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 928 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 929 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 930 } 931 932 sA = mat->A->nonzerostate; 933 sB = mat->B->nonzerostate; 934 935 if (diag != 0.0 && cong) { 936 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 937 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 938 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 939 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 940 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 941 PetscInt nnwA, nnwB; 942 PetscBool nnzA, nnzB; 943 944 nnwA = aijA->nonew; 945 nnwB = aijB->nonew; 946 nnzA = aijA->keepnonzeropattern; 947 nnzB = aijB->keepnonzeropattern; 948 if (!nnzA) { 949 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 950 aijA->nonew = 0; 951 } 952 if (!nnzB) { 953 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 954 aijB->nonew = 0; 955 } 956 /* Must zero here before the next loop */ 957 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 958 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 959 for (r = 0; r < len; ++r) { 960 const PetscInt row = lrows[r] + A->rmap->rstart; 961 if (row >= A->cmap->N) continue; 962 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 963 } 964 aijA->nonew = nnwA; 965 aijB->nonew = nnwB; 966 } else { 967 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 968 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 969 } 970 ierr = PetscFree(lrows);CHKERRQ(ierr); 971 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 972 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 973 974 /* reduce nonzerostate */ 975 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 976 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 977 if (gch) A->nonzerostate++; 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 982 { 983 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 984 PetscErrorCode ierr; 985 PetscMPIInt n = A->rmap->n; 986 PetscInt i,j,r,m,len = 0; 987 PetscInt *lrows,*owners = A->rmap->range; 988 PetscMPIInt p = 0; 989 PetscSFNode *rrows; 990 PetscSF sf; 991 const PetscScalar *xx; 992 PetscScalar *bb,*mask; 993 Vec xmask,lmask; 994 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 995 const PetscInt *aj, *ii,*ridx; 996 PetscScalar *aa; 997 998 PetscFunctionBegin; 999 /* Create SF where leaves are input rows and roots are owned rows */ 1000 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1001 for (r = 0; r < n; ++r) lrows[r] = -1; 1002 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1003 for (r = 0; r < N; ++r) { 1004 const PetscInt idx = rows[r]; 1005 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1006 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1007 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1008 } 1009 rrows[r].rank = p; 1010 rrows[r].index = rows[r] - owners[p]; 1011 } 1012 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1013 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1014 /* Collect flags for rows to be zeroed */ 1015 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1016 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1017 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1018 /* Compress and put in row numbers */ 1019 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1020 /* zero diagonal part of matrix */ 1021 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1022 /* handle off diagonal part of matrix */ 1023 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1026 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1031 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1032 PetscBool cong; 1033 1034 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1035 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1036 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1039 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1040 } 1041 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1042 /* remove zeroed rows of off diagonal matrix */ 1043 ii = aij->i; 1044 for (i=0; i<len; i++) { 1045 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1046 } 1047 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1048 if (aij->compressedrow.use) { 1049 m = aij->compressedrow.nrows; 1050 ii = aij->compressedrow.i; 1051 ridx = aij->compressedrow.rindex; 1052 for (i=0; i<m; i++) { 1053 n = ii[i+1] - ii[i]; 1054 aj = aij->j + ii[i]; 1055 aa = aij->a + ii[i]; 1056 1057 for (j=0; j<n; j++) { 1058 if (PetscAbsScalar(mask[*aj])) { 1059 if (b) bb[*ridx] -= *aa*xx[*aj]; 1060 *aa = 0.0; 1061 } 1062 aa++; 1063 aj++; 1064 } 1065 ridx++; 1066 } 1067 } else { /* do not use compressed row format */ 1068 m = l->B->rmap->n; 1069 for (i=0; i<m; i++) { 1070 n = ii[i+1] - ii[i]; 1071 aj = aij->j + ii[i]; 1072 aa = aij->a + ii[i]; 1073 for (j=0; j<n; j++) { 1074 if (PetscAbsScalar(mask[*aj])) { 1075 if (b) bb[i] -= *aa*xx[*aj]; 1076 *aa = 0.0; 1077 } 1078 aa++; 1079 aj++; 1080 } 1081 } 1082 } 1083 if (x && b) { 1084 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1085 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1086 } 1087 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1088 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1089 ierr = PetscFree(lrows);CHKERRQ(ierr); 1090 1091 /* only change matrix nonzero state if pattern was allowed to be changed */ 1092 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1093 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1094 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1095 } 1096 PetscFunctionReturn(0); 1097 } 1098 1099 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1100 { 1101 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1102 PetscErrorCode ierr; 1103 PetscInt nt; 1104 VecScatter Mvctx = a->Mvctx; 1105 1106 PetscFunctionBegin; 1107 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1108 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1109 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1111 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1127 { 1128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1129 PetscErrorCode ierr; 1130 VecScatter Mvctx = a->Mvctx; 1131 1132 PetscFunctionBegin; 1133 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1134 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1135 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1136 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1141 { 1142 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1143 PetscErrorCode ierr; 1144 1145 PetscFunctionBegin; 1146 /* do nondiagonal part */ 1147 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1148 /* do local part */ 1149 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1150 /* add partial results together */ 1151 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1152 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1157 { 1158 MPI_Comm comm; 1159 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1160 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1161 IS Me,Notme; 1162 PetscErrorCode ierr; 1163 PetscInt M,N,first,last,*notme,i; 1164 PetscBool lf; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 /* Easy test: symmetric diagonal block */ 1169 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1170 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1171 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1199 { 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1208 { 1209 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1210 PetscErrorCode ierr; 1211 1212 PetscFunctionBegin; 1213 /* do nondiagonal part */ 1214 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1215 /* do local part */ 1216 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1217 /* add partial results together */ 1218 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1219 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 /* 1224 This only works correctly for square matrices where the subblock A->A is the 1225 diagonal block 1226 */ 1227 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1228 { 1229 PetscErrorCode ierr; 1230 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1231 1232 PetscFunctionBegin; 1233 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1234 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1235 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1240 { 1241 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1242 PetscErrorCode ierr; 1243 1244 PetscFunctionBegin; 1245 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1246 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1251 { 1252 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1253 PetscErrorCode ierr; 1254 1255 PetscFunctionBegin; 1256 #if defined(PETSC_USE_LOG) 1257 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1258 #endif 1259 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1260 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1261 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1263 #if defined(PETSC_USE_CTABLE) 1264 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1265 #else 1266 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1267 #endif 1268 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1269 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1270 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1271 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1272 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1273 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1274 1275 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1276 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1277 1278 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1288 #if defined(PETSC_HAVE_CUDA) 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1290 #endif 1291 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1293 #endif 1294 #if defined(PETSC_HAVE_ELEMENTAL) 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1296 #endif 1297 #if defined(PETSC_HAVE_SCALAPACK) 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1299 #endif 1300 #if defined(PETSC_HAVE_HYPRE) 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1303 #endif 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1310 #if defined(PETSC_HAVE_MKL_SPARSE) 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1312 #endif 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1320 { 1321 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1322 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1323 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1324 const PetscInt *garray = aij->garray; 1325 const PetscScalar *aa,*ba; 1326 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1327 PetscInt *rowlens; 1328 PetscInt *colidxs; 1329 PetscScalar *matvals; 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1334 1335 M = mat->rmap->N; 1336 N = mat->cmap->N; 1337 m = mat->rmap->n; 1338 rs = mat->rmap->rstart; 1339 cs = mat->cmap->rstart; 1340 nz = A->nz + B->nz; 1341 1342 /* write matrix header */ 1343 header[0] = MAT_FILE_CLASSID; 1344 header[1] = M; header[2] = N; header[3] = nz; 1345 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1346 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1347 1348 /* fill in and store row lengths */ 1349 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1350 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1351 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1352 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1353 1354 /* fill in and store column indices */ 1355 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1356 for (cnt=0, i=0; i<m; i++) { 1357 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1358 if (garray[B->j[jb]] > cs) break; 1359 colidxs[cnt++] = garray[B->j[jb]]; 1360 } 1361 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1362 colidxs[cnt++] = A->j[ja] + cs; 1363 for (; jb<B->i[i+1]; jb++) 1364 colidxs[cnt++] = garray[B->j[jb]]; 1365 } 1366 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1367 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1368 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1369 1370 /* fill in and store nonzero values */ 1371 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1372 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1373 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1374 for (cnt=0, i=0; i<m; i++) { 1375 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1376 if (garray[B->j[jb]] > cs) break; 1377 matvals[cnt++] = ba[jb]; 1378 } 1379 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1380 matvals[cnt++] = aa[ja]; 1381 for (; jb<B->i[i+1]; jb++) 1382 matvals[cnt++] = ba[jb]; 1383 } 1384 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1385 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1386 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1387 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1388 ierr = PetscFree(matvals);CHKERRQ(ierr); 1389 1390 /* write block size option to the viewer's .info file */ 1391 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #include <petscdraw.h> 1396 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1397 { 1398 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1399 PetscErrorCode ierr; 1400 PetscMPIInt rank = aij->rank,size = aij->size; 1401 PetscBool isdraw,iascii,isbinary; 1402 PetscViewer sviewer; 1403 PetscViewerFormat format; 1404 1405 PetscFunctionBegin; 1406 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1407 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1408 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1409 if (iascii) { 1410 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1411 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1412 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1413 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1414 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1415 for (i=0; i<(PetscInt)size; i++) { 1416 nmax = PetscMax(nmax,nz[i]); 1417 nmin = PetscMin(nmin,nz[i]); 1418 navg += nz[i]; 1419 } 1420 ierr = PetscFree(nz);CHKERRQ(ierr); 1421 navg = navg/size; 1422 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1426 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1427 MatInfo info; 1428 PetscBool inodes; 1429 1430 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1431 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1432 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1433 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1434 if (!inodes) { 1435 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1436 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1437 } else { 1438 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1439 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1440 } 1441 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1442 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1443 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1444 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1445 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1446 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1447 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1448 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1451 PetscInt inodecount,inodelimit,*inodes; 1452 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1453 if (inodes) { 1454 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1455 } else { 1456 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1457 } 1458 PetscFunctionReturn(0); 1459 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1460 PetscFunctionReturn(0); 1461 } 1462 } else if (isbinary) { 1463 if (size == 1) { 1464 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1465 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1466 } else { 1467 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1468 } 1469 PetscFunctionReturn(0); 1470 } else if (iascii && size == 1) { 1471 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1472 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } else if (isdraw) { 1475 PetscDraw draw; 1476 PetscBool isnull; 1477 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1478 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1479 if (isnull) PetscFunctionReturn(0); 1480 } 1481 1482 { /* assemble the entire matrix onto first processor */ 1483 Mat A = NULL, Av; 1484 IS isrow,iscol; 1485 1486 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1487 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1488 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1489 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1490 /* The commented code uses MatCreateSubMatrices instead */ 1491 /* 1492 Mat *AA, A = NULL, Av; 1493 IS isrow,iscol; 1494 1495 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1496 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1497 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1498 if (!rank) { 1499 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1500 A = AA[0]; 1501 Av = AA[0]; 1502 } 1503 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1504 */ 1505 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1506 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1507 /* 1508 Everyone has to call to draw the matrix since the graphics waits are 1509 synchronized across all processors that share the PetscDraw object 1510 */ 1511 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1512 if (!rank) { 1513 if (((PetscObject)mat)->name) { 1514 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1515 } 1516 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1517 } 1518 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1519 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1520 ierr = MatDestroy(&A);CHKERRQ(ierr); 1521 } 1522 PetscFunctionReturn(0); 1523 } 1524 1525 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1526 { 1527 PetscErrorCode ierr; 1528 PetscBool iascii,isdraw,issocket,isbinary; 1529 1530 PetscFunctionBegin; 1531 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1532 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1533 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1534 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1535 if (iascii || isdraw || isbinary || issocket) { 1536 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1537 } 1538 PetscFunctionReturn(0); 1539 } 1540 1541 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1542 { 1543 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1544 PetscErrorCode ierr; 1545 Vec bb1 = NULL; 1546 PetscBool hasop; 1547 1548 PetscFunctionBegin; 1549 if (flag == SOR_APPLY_UPPER) { 1550 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1551 PetscFunctionReturn(0); 1552 } 1553 1554 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1555 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1556 } 1557 1558 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1559 if (flag & SOR_ZERO_INITIAL_GUESS) { 1560 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1561 its--; 1562 } 1563 1564 while (its--) { 1565 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1566 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1567 1568 /* update rhs: bb1 = bb - B*x */ 1569 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1570 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1571 1572 /* local sweep */ 1573 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1574 } 1575 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1576 if (flag & SOR_ZERO_INITIAL_GUESS) { 1577 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1578 its--; 1579 } 1580 while (its--) { 1581 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1582 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1583 1584 /* update rhs: bb1 = bb - B*x */ 1585 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1586 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1587 1588 /* local sweep */ 1589 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1590 } 1591 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1592 if (flag & SOR_ZERO_INITIAL_GUESS) { 1593 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1594 its--; 1595 } 1596 while (its--) { 1597 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1598 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1599 1600 /* update rhs: bb1 = bb - B*x */ 1601 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1602 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1603 1604 /* local sweep */ 1605 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1606 } 1607 } else if (flag & SOR_EISENSTAT) { 1608 Vec xx1; 1609 1610 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1611 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1612 1613 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1614 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1615 if (!mat->diag) { 1616 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1617 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1618 } 1619 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1620 if (hasop) { 1621 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1622 } else { 1623 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1624 } 1625 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1626 1627 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1628 1629 /* local sweep */ 1630 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1631 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1632 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1633 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1634 1635 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1636 1637 matin->factorerrortype = mat->A->factorerrortype; 1638 PetscFunctionReturn(0); 1639 } 1640 1641 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1642 { 1643 Mat aA,aB,Aperm; 1644 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1645 PetscScalar *aa,*ba; 1646 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1647 PetscSF rowsf,sf; 1648 IS parcolp = NULL; 1649 PetscBool done; 1650 PetscErrorCode ierr; 1651 1652 PetscFunctionBegin; 1653 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1654 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1655 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1656 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1657 1658 /* Invert row permutation to find out where my rows should go */ 1659 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1660 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1661 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1662 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1663 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1664 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1665 1666 /* Invert column permutation to find out where my columns should go */ 1667 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1668 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1669 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1670 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1671 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1672 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1673 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1674 1675 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1676 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1677 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1678 1679 /* Find out where my gcols should go */ 1680 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1681 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1682 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1683 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1684 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1685 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1686 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1687 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1688 1689 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1690 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1691 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1692 for (i=0; i<m; i++) { 1693 PetscInt row = rdest[i]; 1694 PetscMPIInt rowner; 1695 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1696 for (j=ai[i]; j<ai[i+1]; j++) { 1697 PetscInt col = cdest[aj[j]]; 1698 PetscMPIInt cowner; 1699 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1700 if (rowner == cowner) dnnz[i]++; 1701 else onnz[i]++; 1702 } 1703 for (j=bi[i]; j<bi[i+1]; j++) { 1704 PetscInt col = gcdest[bj[j]]; 1705 PetscMPIInt cowner; 1706 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1707 if (rowner == cowner) dnnz[i]++; 1708 else onnz[i]++; 1709 } 1710 } 1711 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1712 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1713 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1714 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1715 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1716 1717 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1718 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1719 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1720 for (i=0; i<m; i++) { 1721 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1722 PetscInt j0,rowlen; 1723 rowlen = ai[i+1] - ai[i]; 1724 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1725 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1726 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1727 } 1728 rowlen = bi[i+1] - bi[i]; 1729 for (j0=j=0; j<rowlen; j0=j) { 1730 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1731 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1732 } 1733 } 1734 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1735 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1736 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1737 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1738 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1739 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1740 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1741 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1742 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1743 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1744 *B = Aperm; 1745 PetscFunctionReturn(0); 1746 } 1747 1748 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1749 { 1750 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1751 PetscErrorCode ierr; 1752 1753 PetscFunctionBegin; 1754 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1755 if (ghosts) *ghosts = aij->garray; 1756 PetscFunctionReturn(0); 1757 } 1758 1759 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1760 { 1761 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1762 Mat A = mat->A,B = mat->B; 1763 PetscErrorCode ierr; 1764 PetscLogDouble isend[5],irecv[5]; 1765 1766 PetscFunctionBegin; 1767 info->block_size = 1.0; 1768 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1769 1770 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1771 isend[3] = info->memory; isend[4] = info->mallocs; 1772 1773 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1774 1775 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1776 isend[3] += info->memory; isend[4] += info->mallocs; 1777 if (flag == MAT_LOCAL) { 1778 info->nz_used = isend[0]; 1779 info->nz_allocated = isend[1]; 1780 info->nz_unneeded = isend[2]; 1781 info->memory = isend[3]; 1782 info->mallocs = isend[4]; 1783 } else if (flag == MAT_GLOBAL_MAX) { 1784 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1785 1786 info->nz_used = irecv[0]; 1787 info->nz_allocated = irecv[1]; 1788 info->nz_unneeded = irecv[2]; 1789 info->memory = irecv[3]; 1790 info->mallocs = irecv[4]; 1791 } else if (flag == MAT_GLOBAL_SUM) { 1792 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1793 1794 info->nz_used = irecv[0]; 1795 info->nz_allocated = irecv[1]; 1796 info->nz_unneeded = irecv[2]; 1797 info->memory = irecv[3]; 1798 info->mallocs = irecv[4]; 1799 } 1800 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1801 info->fill_ratio_needed = 0; 1802 info->factor_mallocs = 0; 1803 PetscFunctionReturn(0); 1804 } 1805 1806 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1807 { 1808 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1809 PetscErrorCode ierr; 1810 1811 PetscFunctionBegin; 1812 switch (op) { 1813 case MAT_NEW_NONZERO_LOCATIONS: 1814 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1815 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1816 case MAT_KEEP_NONZERO_PATTERN: 1817 case MAT_NEW_NONZERO_LOCATION_ERR: 1818 case MAT_USE_INODES: 1819 case MAT_IGNORE_ZERO_ENTRIES: 1820 MatCheckPreallocated(A,1); 1821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1822 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1823 break; 1824 case MAT_ROW_ORIENTED: 1825 MatCheckPreallocated(A,1); 1826 a->roworiented = flg; 1827 1828 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1829 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1830 break; 1831 case MAT_FORCE_DIAGONAL_ENTRIES: 1832 case MAT_SORTED_FULL: 1833 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1834 break; 1835 case MAT_IGNORE_OFF_PROC_ENTRIES: 1836 a->donotstash = flg; 1837 break; 1838 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1839 case MAT_SPD: 1840 case MAT_SYMMETRIC: 1841 case MAT_STRUCTURALLY_SYMMETRIC: 1842 case MAT_HERMITIAN: 1843 case MAT_SYMMETRY_ETERNAL: 1844 break; 1845 case MAT_SUBMAT_SINGLEIS: 1846 A->submat_singleis = flg; 1847 break; 1848 case MAT_STRUCTURE_ONLY: 1849 /* The option is handled directly by MatSetOption() */ 1850 break; 1851 default: 1852 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1853 } 1854 PetscFunctionReturn(0); 1855 } 1856 1857 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1858 { 1859 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1860 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1861 PetscErrorCode ierr; 1862 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1863 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1864 PetscInt *cmap,*idx_p; 1865 1866 PetscFunctionBegin; 1867 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1868 mat->getrowactive = PETSC_TRUE; 1869 1870 if (!mat->rowvalues && (idx || v)) { 1871 /* 1872 allocate enough space to hold information from the longest row. 1873 */ 1874 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1875 PetscInt max = 1,tmp; 1876 for (i=0; i<matin->rmap->n; i++) { 1877 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1878 if (max < tmp) max = tmp; 1879 } 1880 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1881 } 1882 1883 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1884 lrow = row - rstart; 1885 1886 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1887 if (!v) {pvA = NULL; pvB = NULL;} 1888 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1889 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1890 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1891 nztot = nzA + nzB; 1892 1893 cmap = mat->garray; 1894 if (v || idx) { 1895 if (nztot) { 1896 /* Sort by increasing column numbers, assuming A and B already sorted */ 1897 PetscInt imark = -1; 1898 if (v) { 1899 *v = v_p = mat->rowvalues; 1900 for (i=0; i<nzB; i++) { 1901 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1902 else break; 1903 } 1904 imark = i; 1905 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1906 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1907 } 1908 if (idx) { 1909 *idx = idx_p = mat->rowindices; 1910 if (imark > -1) { 1911 for (i=0; i<imark; i++) { 1912 idx_p[i] = cmap[cworkB[i]]; 1913 } 1914 } else { 1915 for (i=0; i<nzB; i++) { 1916 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1917 else break; 1918 } 1919 imark = i; 1920 } 1921 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1922 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1923 } 1924 } else { 1925 if (idx) *idx = NULL; 1926 if (v) *v = NULL; 1927 } 1928 } 1929 *nz = nztot; 1930 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1931 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1936 { 1937 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1938 1939 PetscFunctionBegin; 1940 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1941 aij->getrowactive = PETSC_FALSE; 1942 PetscFunctionReturn(0); 1943 } 1944 1945 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1946 { 1947 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1948 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1949 PetscErrorCode ierr; 1950 PetscInt i,j,cstart = mat->cmap->rstart; 1951 PetscReal sum = 0.0; 1952 MatScalar *v; 1953 1954 PetscFunctionBegin; 1955 if (aij->size == 1) { 1956 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1957 } else { 1958 if (type == NORM_FROBENIUS) { 1959 v = amat->a; 1960 for (i=0; i<amat->nz; i++) { 1961 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1962 } 1963 v = bmat->a; 1964 for (i=0; i<bmat->nz; i++) { 1965 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1966 } 1967 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1968 *norm = PetscSqrtReal(*norm); 1969 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1970 } else if (type == NORM_1) { /* max column norm */ 1971 PetscReal *tmp,*tmp2; 1972 PetscInt *jj,*garray = aij->garray; 1973 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1974 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1975 *norm = 0.0; 1976 v = amat->a; jj = amat->j; 1977 for (j=0; j<amat->nz; j++) { 1978 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1979 } 1980 v = bmat->a; jj = bmat->j; 1981 for (j=0; j<bmat->nz; j++) { 1982 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1983 } 1984 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1985 for (j=0; j<mat->cmap->N; j++) { 1986 if (tmp2[j] > *norm) *norm = tmp2[j]; 1987 } 1988 ierr = PetscFree(tmp);CHKERRQ(ierr); 1989 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1990 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1991 } else if (type == NORM_INFINITY) { /* max row norm */ 1992 PetscReal ntemp = 0.0; 1993 for (j=0; j<aij->A->rmap->n; j++) { 1994 v = amat->a + amat->i[j]; 1995 sum = 0.0; 1996 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1997 sum += PetscAbsScalar(*v); v++; 1998 } 1999 v = bmat->a + bmat->i[j]; 2000 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2001 sum += PetscAbsScalar(*v); v++; 2002 } 2003 if (sum > ntemp) ntemp = sum; 2004 } 2005 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2006 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2007 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2008 } 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2013 { 2014 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2015 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2016 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2017 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2018 PetscErrorCode ierr; 2019 Mat B,A_diag,*B_diag; 2020 const MatScalar *array; 2021 2022 PetscFunctionBegin; 2023 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2024 ai = Aloc->i; aj = Aloc->j; 2025 bi = Bloc->i; bj = Bloc->j; 2026 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2027 PetscInt *d_nnz,*g_nnz,*o_nnz; 2028 PetscSFNode *oloc; 2029 PETSC_UNUSED PetscSF sf; 2030 2031 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2032 /* compute d_nnz for preallocation */ 2033 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2034 for (i=0; i<ai[ma]; i++) { 2035 d_nnz[aj[i]]++; 2036 } 2037 /* compute local off-diagonal contributions */ 2038 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2039 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2040 /* map those to global */ 2041 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2042 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2043 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2044 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2045 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2046 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2047 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2048 2049 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2050 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2051 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2052 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2053 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2054 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2055 } else { 2056 B = *matout; 2057 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2058 } 2059 2060 b = (Mat_MPIAIJ*)B->data; 2061 A_diag = a->A; 2062 B_diag = &b->A; 2063 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2064 A_diag_ncol = A_diag->cmap->N; 2065 B_diag_ilen = sub_B_diag->ilen; 2066 B_diag_i = sub_B_diag->i; 2067 2068 /* Set ilen for diagonal of B */ 2069 for (i=0; i<A_diag_ncol; i++) { 2070 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2071 } 2072 2073 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2074 very quickly (=without using MatSetValues), because all writes are local. */ 2075 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2076 2077 /* copy over the B part */ 2078 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2079 array = Bloc->a; 2080 row = A->rmap->rstart; 2081 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2082 cols_tmp = cols; 2083 for (i=0; i<mb; i++) { 2084 ncol = bi[i+1]-bi[i]; 2085 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2086 row++; 2087 array += ncol; cols_tmp += ncol; 2088 } 2089 ierr = PetscFree(cols);CHKERRQ(ierr); 2090 2091 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2092 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2093 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2094 *matout = B; 2095 } else { 2096 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2097 } 2098 PetscFunctionReturn(0); 2099 } 2100 2101 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2102 { 2103 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2104 Mat a = aij->A,b = aij->B; 2105 PetscErrorCode ierr; 2106 PetscInt s1,s2,s3; 2107 2108 PetscFunctionBegin; 2109 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2110 if (rr) { 2111 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2112 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2113 /* Overlap communication with computation. */ 2114 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2115 } 2116 if (ll) { 2117 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2118 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2119 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 2120 } 2121 /* scale the diagonal block */ 2122 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2123 2124 if (rr) { 2125 /* Do a scatter end and then right scale the off-diagonal block */ 2126 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2127 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2128 } 2129 PetscFunctionReturn(0); 2130 } 2131 2132 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2133 { 2134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2135 PetscErrorCode ierr; 2136 2137 PetscFunctionBegin; 2138 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2143 { 2144 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2145 Mat a,b,c,d; 2146 PetscBool flg; 2147 PetscErrorCode ierr; 2148 2149 PetscFunctionBegin; 2150 a = matA->A; b = matA->B; 2151 c = matB->A; d = matB->B; 2152 2153 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2154 if (flg) { 2155 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2156 } 2157 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2162 { 2163 PetscErrorCode ierr; 2164 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2165 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2166 2167 PetscFunctionBegin; 2168 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2169 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2170 /* because of the column compression in the off-processor part of the matrix a->B, 2171 the number of columns in a->B and b->B may be different, hence we cannot call 2172 the MatCopy() directly on the two parts. If need be, we can provide a more 2173 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2174 then copying the submatrices */ 2175 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2176 } else { 2177 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2178 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2179 } 2180 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2181 PetscFunctionReturn(0); 2182 } 2183 2184 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2185 { 2186 PetscErrorCode ierr; 2187 2188 PetscFunctionBegin; 2189 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2190 PetscFunctionReturn(0); 2191 } 2192 2193 /* 2194 Computes the number of nonzeros per row needed for preallocation when X and Y 2195 have different nonzero structure. 2196 */ 2197 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2198 { 2199 PetscInt i,j,k,nzx,nzy; 2200 2201 PetscFunctionBegin; 2202 /* Set the number of nonzeros in the new matrix */ 2203 for (i=0; i<m; i++) { 2204 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2205 nzx = xi[i+1] - xi[i]; 2206 nzy = yi[i+1] - yi[i]; 2207 nnz[i] = 0; 2208 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2209 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2210 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2211 nnz[i]++; 2212 } 2213 for (; k<nzy; k++) nnz[i]++; 2214 } 2215 PetscFunctionReturn(0); 2216 } 2217 2218 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2219 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2220 { 2221 PetscErrorCode ierr; 2222 PetscInt m = Y->rmap->N; 2223 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2224 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2225 2226 PetscFunctionBegin; 2227 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2228 PetscFunctionReturn(0); 2229 } 2230 2231 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2232 { 2233 PetscErrorCode ierr; 2234 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2235 2236 PetscFunctionBegin; 2237 if (str == SAME_NONZERO_PATTERN) { 2238 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2239 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2240 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2241 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2242 } else { 2243 Mat B; 2244 PetscInt *nnz_d,*nnz_o; 2245 2246 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2247 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2248 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2249 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2250 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2251 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2253 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2254 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2255 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2256 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2257 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2258 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2259 } 2260 PetscFunctionReturn(0); 2261 } 2262 2263 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2264 2265 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2266 { 2267 #if defined(PETSC_USE_COMPLEX) 2268 PetscErrorCode ierr; 2269 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2270 2271 PetscFunctionBegin; 2272 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2273 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2274 #else 2275 PetscFunctionBegin; 2276 #endif 2277 PetscFunctionReturn(0); 2278 } 2279 2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2281 { 2282 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2283 PetscErrorCode ierr; 2284 2285 PetscFunctionBegin; 2286 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2287 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2288 PetscFunctionReturn(0); 2289 } 2290 2291 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2292 { 2293 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2294 PetscErrorCode ierr; 2295 2296 PetscFunctionBegin; 2297 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2298 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2303 { 2304 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2305 PetscErrorCode ierr; 2306 PetscInt i,*idxb = NULL,m = A->rmap->n; 2307 PetscScalar *va,*vv; 2308 Vec vB,vA; 2309 const PetscScalar *vb; 2310 2311 PetscFunctionBegin; 2312 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2313 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2314 2315 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2316 if (idx) { 2317 for (i=0; i<m; i++) { 2318 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2319 } 2320 } 2321 2322 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2323 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2324 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2325 2326 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2327 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2328 for (i=0; i<m; i++) { 2329 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2330 vv[i] = vb[i]; 2331 if (idx) idx[i] = a->garray[idxb[i]]; 2332 } else { 2333 vv[i] = va[i]; 2334 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2335 idx[i] = a->garray[idxb[i]]; 2336 } 2337 } 2338 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2339 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2340 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2341 ierr = PetscFree(idxb);CHKERRQ(ierr); 2342 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2343 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2344 PetscFunctionReturn(0); 2345 } 2346 2347 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2348 { 2349 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2350 PetscInt m = A->rmap->n,n = A->cmap->n; 2351 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2352 PetscInt *cmap = mat->garray; 2353 PetscInt *diagIdx, *offdiagIdx; 2354 Vec diagV, offdiagV; 2355 PetscScalar *a, *diagA, *offdiagA, *ba; 2356 PetscInt r,j,col,ncols,*bi,*bj; 2357 PetscErrorCode ierr; 2358 Mat B = mat->B; 2359 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2360 2361 PetscFunctionBegin; 2362 /* When a process holds entire A and other processes have no entry */ 2363 if (A->cmap->N == n) { 2364 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2365 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2366 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2367 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2368 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2369 PetscFunctionReturn(0); 2370 } else if (n == 0) { 2371 if (m) { 2372 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2373 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2374 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2375 } 2376 PetscFunctionReturn(0); 2377 } 2378 2379 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2380 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2381 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2382 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2383 2384 /* Get offdiagIdx[] for implicit 0.0 */ 2385 ba = b->a; 2386 bi = b->i; 2387 bj = b->j; 2388 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2389 for (r = 0; r < m; r++) { 2390 ncols = bi[r+1] - bi[r]; 2391 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2392 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2393 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2394 offdiagA[r] = 0.0; 2395 2396 /* Find first hole in the cmap */ 2397 for (j=0; j<ncols; j++) { 2398 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2399 if (col > j && j < cstart) { 2400 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2401 break; 2402 } else if (col > j + n && j >= cstart) { 2403 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2404 break; 2405 } 2406 } 2407 if (j == ncols && ncols < A->cmap->N - n) { 2408 /* a hole is outside compressed Bcols */ 2409 if (ncols == 0) { 2410 if (cstart) { 2411 offdiagIdx[r] = 0; 2412 } else offdiagIdx[r] = cend; 2413 } else { /* ncols > 0 */ 2414 offdiagIdx[r] = cmap[ncols-1] + 1; 2415 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2416 } 2417 } 2418 } 2419 2420 for (j=0; j<ncols; j++) { 2421 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2422 ba++; bj++; 2423 } 2424 } 2425 2426 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2427 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2428 for (r = 0; r < m; ++r) { 2429 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2430 a[r] = diagA[r]; 2431 if (idx) idx[r] = cstart + diagIdx[r]; 2432 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2433 a[r] = diagA[r]; 2434 if (idx) { 2435 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2436 idx[r] = cstart + diagIdx[r]; 2437 } else idx[r] = offdiagIdx[r]; 2438 } 2439 } else { 2440 a[r] = offdiagA[r]; 2441 if (idx) idx[r] = offdiagIdx[r]; 2442 } 2443 } 2444 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2445 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2446 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2447 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2448 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2449 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2454 { 2455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2456 PetscInt m = A->rmap->n,n = A->cmap->n; 2457 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2458 PetscInt *cmap = mat->garray; 2459 PetscInt *diagIdx, *offdiagIdx; 2460 Vec diagV, offdiagV; 2461 PetscScalar *a, *diagA, *offdiagA, *ba; 2462 PetscInt r,j,col,ncols,*bi,*bj; 2463 PetscErrorCode ierr; 2464 Mat B = mat->B; 2465 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2466 2467 PetscFunctionBegin; 2468 /* When a process holds entire A and other processes have no entry */ 2469 if (A->cmap->N == n) { 2470 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2471 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2472 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2473 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2474 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2475 PetscFunctionReturn(0); 2476 } else if (n == 0) { 2477 if (m) { 2478 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2479 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2480 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2481 } 2482 PetscFunctionReturn(0); 2483 } 2484 2485 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2486 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2487 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2488 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2489 2490 /* Get offdiagIdx[] for implicit 0.0 */ 2491 ba = b->a; 2492 bi = b->i; 2493 bj = b->j; 2494 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2495 for (r = 0; r < m; r++) { 2496 ncols = bi[r+1] - bi[r]; 2497 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2498 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2499 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2500 offdiagA[r] = 0.0; 2501 2502 /* Find first hole in the cmap */ 2503 for (j=0; j<ncols; j++) { 2504 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2505 if (col > j && j < cstart) { 2506 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2507 break; 2508 } else if (col > j + n && j >= cstart) { 2509 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2510 break; 2511 } 2512 } 2513 if (j == ncols && ncols < A->cmap->N - n) { 2514 /* a hole is outside compressed Bcols */ 2515 if (ncols == 0) { 2516 if (cstart) { 2517 offdiagIdx[r] = 0; 2518 } else offdiagIdx[r] = cend; 2519 } else { /* ncols > 0 */ 2520 offdiagIdx[r] = cmap[ncols-1] + 1; 2521 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2522 } 2523 } 2524 } 2525 2526 for (j=0; j<ncols; j++) { 2527 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2528 ba++; bj++; 2529 } 2530 } 2531 2532 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2533 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2534 for (r = 0; r < m; ++r) { 2535 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2536 a[r] = diagA[r]; 2537 if (idx) idx[r] = cstart + diagIdx[r]; 2538 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2539 a[r] = diagA[r]; 2540 if (idx) { 2541 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2542 idx[r] = cstart + diagIdx[r]; 2543 } else idx[r] = offdiagIdx[r]; 2544 } 2545 } else { 2546 a[r] = offdiagA[r]; 2547 if (idx) idx[r] = offdiagIdx[r]; 2548 } 2549 } 2550 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2551 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2552 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2553 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2554 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2555 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2556 PetscFunctionReturn(0); 2557 } 2558 2559 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2560 { 2561 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2562 PetscInt m = A->rmap->n,n = A->cmap->n; 2563 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2564 PetscInt *cmap = mat->garray; 2565 PetscInt *diagIdx, *offdiagIdx; 2566 Vec diagV, offdiagV; 2567 PetscScalar *a, *diagA, *offdiagA, *ba; 2568 PetscInt r,j,col,ncols,*bi,*bj; 2569 PetscErrorCode ierr; 2570 Mat B = mat->B; 2571 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2572 2573 PetscFunctionBegin; 2574 /* When a process holds entire A and other processes have no entry */ 2575 if (A->cmap->N == n) { 2576 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2577 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2578 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2579 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2580 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2581 PetscFunctionReturn(0); 2582 } else if (n == 0) { 2583 if (m) { 2584 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2585 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2586 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2587 } 2588 PetscFunctionReturn(0); 2589 } 2590 2591 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2592 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2593 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2594 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2595 2596 /* Get offdiagIdx[] for implicit 0.0 */ 2597 ba = b->a; 2598 bi = b->i; 2599 bj = b->j; 2600 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2601 for (r = 0; r < m; r++) { 2602 ncols = bi[r+1] - bi[r]; 2603 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2604 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2605 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2606 offdiagA[r] = 0.0; 2607 2608 /* Find first hole in the cmap */ 2609 for (j=0; j<ncols; j++) { 2610 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2611 if (col > j && j < cstart) { 2612 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2613 break; 2614 } else if (col > j + n && j >= cstart) { 2615 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2616 break; 2617 } 2618 } 2619 if (j == ncols && ncols < A->cmap->N - n) { 2620 /* a hole is outside compressed Bcols */ 2621 if (ncols == 0) { 2622 if (cstart) { 2623 offdiagIdx[r] = 0; 2624 } else offdiagIdx[r] = cend; 2625 } else { /* ncols > 0 */ 2626 offdiagIdx[r] = cmap[ncols-1] + 1; 2627 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2628 } 2629 } 2630 } 2631 2632 for (j=0; j<ncols; j++) { 2633 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2634 ba++; bj++; 2635 } 2636 } 2637 2638 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2639 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2640 for (r = 0; r < m; ++r) { 2641 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2642 a[r] = diagA[r]; 2643 if (idx) idx[r] = cstart + diagIdx[r]; 2644 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2645 a[r] = diagA[r]; 2646 if (idx) { 2647 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2648 idx[r] = cstart + diagIdx[r]; 2649 } else idx[r] = offdiagIdx[r]; 2650 } 2651 } else { 2652 a[r] = offdiagA[r]; 2653 if (idx) idx[r] = offdiagIdx[r]; 2654 } 2655 } 2656 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2657 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2658 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2659 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2660 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2661 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2662 PetscFunctionReturn(0); 2663 } 2664 2665 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2666 { 2667 PetscErrorCode ierr; 2668 Mat *dummy; 2669 2670 PetscFunctionBegin; 2671 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2672 *newmat = *dummy; 2673 ierr = PetscFree(dummy);CHKERRQ(ierr); 2674 PetscFunctionReturn(0); 2675 } 2676 2677 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2678 { 2679 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2680 PetscErrorCode ierr; 2681 2682 PetscFunctionBegin; 2683 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2684 A->factorerrortype = a->A->factorerrortype; 2685 PetscFunctionReturn(0); 2686 } 2687 2688 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2689 { 2690 PetscErrorCode ierr; 2691 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2692 2693 PetscFunctionBegin; 2694 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2695 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2696 if (x->assembled) { 2697 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2698 } else { 2699 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2700 } 2701 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2702 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2703 PetscFunctionReturn(0); 2704 } 2705 2706 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2707 { 2708 PetscFunctionBegin; 2709 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2710 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2711 PetscFunctionReturn(0); 2712 } 2713 2714 /*@ 2715 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2716 2717 Collective on Mat 2718 2719 Input Parameters: 2720 + A - the matrix 2721 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2722 2723 Level: advanced 2724 2725 @*/ 2726 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2727 { 2728 PetscErrorCode ierr; 2729 2730 PetscFunctionBegin; 2731 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2732 PetscFunctionReturn(0); 2733 } 2734 2735 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2736 { 2737 PetscErrorCode ierr; 2738 PetscBool sc = PETSC_FALSE,flg; 2739 2740 PetscFunctionBegin; 2741 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2742 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2743 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2744 if (flg) { 2745 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2746 } 2747 ierr = PetscOptionsTail();CHKERRQ(ierr); 2748 PetscFunctionReturn(0); 2749 } 2750 2751 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2752 { 2753 PetscErrorCode ierr; 2754 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2755 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2756 2757 PetscFunctionBegin; 2758 if (!Y->preallocated) { 2759 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2760 } else if (!aij->nz) { 2761 PetscInt nonew = aij->nonew; 2762 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2763 aij->nonew = nonew; 2764 } 2765 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2766 PetscFunctionReturn(0); 2767 } 2768 2769 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2770 { 2771 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2772 PetscErrorCode ierr; 2773 2774 PetscFunctionBegin; 2775 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2776 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2777 if (d) { 2778 PetscInt rstart; 2779 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2780 *d += rstart; 2781 2782 } 2783 PetscFunctionReturn(0); 2784 } 2785 2786 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2787 { 2788 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2789 PetscErrorCode ierr; 2790 2791 PetscFunctionBegin; 2792 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2793 PetscFunctionReturn(0); 2794 } 2795 2796 /* -------------------------------------------------------------------*/ 2797 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2798 MatGetRow_MPIAIJ, 2799 MatRestoreRow_MPIAIJ, 2800 MatMult_MPIAIJ, 2801 /* 4*/ MatMultAdd_MPIAIJ, 2802 MatMultTranspose_MPIAIJ, 2803 MatMultTransposeAdd_MPIAIJ, 2804 NULL, 2805 NULL, 2806 NULL, 2807 /*10*/ NULL, 2808 NULL, 2809 NULL, 2810 MatSOR_MPIAIJ, 2811 MatTranspose_MPIAIJ, 2812 /*15*/ MatGetInfo_MPIAIJ, 2813 MatEqual_MPIAIJ, 2814 MatGetDiagonal_MPIAIJ, 2815 MatDiagonalScale_MPIAIJ, 2816 MatNorm_MPIAIJ, 2817 /*20*/ MatAssemblyBegin_MPIAIJ, 2818 MatAssemblyEnd_MPIAIJ, 2819 MatSetOption_MPIAIJ, 2820 MatZeroEntries_MPIAIJ, 2821 /*24*/ MatZeroRows_MPIAIJ, 2822 NULL, 2823 NULL, 2824 NULL, 2825 NULL, 2826 /*29*/ MatSetUp_MPIAIJ, 2827 NULL, 2828 NULL, 2829 MatGetDiagonalBlock_MPIAIJ, 2830 NULL, 2831 /*34*/ MatDuplicate_MPIAIJ, 2832 NULL, 2833 NULL, 2834 NULL, 2835 NULL, 2836 /*39*/ MatAXPY_MPIAIJ, 2837 MatCreateSubMatrices_MPIAIJ, 2838 MatIncreaseOverlap_MPIAIJ, 2839 MatGetValues_MPIAIJ, 2840 MatCopy_MPIAIJ, 2841 /*44*/ MatGetRowMax_MPIAIJ, 2842 MatScale_MPIAIJ, 2843 MatShift_MPIAIJ, 2844 MatDiagonalSet_MPIAIJ, 2845 MatZeroRowsColumns_MPIAIJ, 2846 /*49*/ MatSetRandom_MPIAIJ, 2847 NULL, 2848 NULL, 2849 NULL, 2850 NULL, 2851 /*54*/ MatFDColoringCreate_MPIXAIJ, 2852 NULL, 2853 MatSetUnfactored_MPIAIJ, 2854 MatPermute_MPIAIJ, 2855 NULL, 2856 /*59*/ MatCreateSubMatrix_MPIAIJ, 2857 MatDestroy_MPIAIJ, 2858 MatView_MPIAIJ, 2859 NULL, 2860 NULL, 2861 /*64*/ NULL, 2862 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2863 NULL, 2864 NULL, 2865 NULL, 2866 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2867 MatGetRowMinAbs_MPIAIJ, 2868 NULL, 2869 NULL, 2870 NULL, 2871 NULL, 2872 /*75*/ MatFDColoringApply_AIJ, 2873 MatSetFromOptions_MPIAIJ, 2874 NULL, 2875 NULL, 2876 MatFindZeroDiagonals_MPIAIJ, 2877 /*80*/ NULL, 2878 NULL, 2879 NULL, 2880 /*83*/ MatLoad_MPIAIJ, 2881 MatIsSymmetric_MPIAIJ, 2882 NULL, 2883 NULL, 2884 NULL, 2885 NULL, 2886 /*89*/ NULL, 2887 NULL, 2888 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2889 NULL, 2890 NULL, 2891 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2892 NULL, 2893 NULL, 2894 NULL, 2895 MatBindToCPU_MPIAIJ, 2896 /*99*/ MatProductSetFromOptions_MPIAIJ, 2897 NULL, 2898 NULL, 2899 MatConjugate_MPIAIJ, 2900 NULL, 2901 /*104*/MatSetValuesRow_MPIAIJ, 2902 MatRealPart_MPIAIJ, 2903 MatImaginaryPart_MPIAIJ, 2904 NULL, 2905 NULL, 2906 /*109*/NULL, 2907 NULL, 2908 MatGetRowMin_MPIAIJ, 2909 NULL, 2910 MatMissingDiagonal_MPIAIJ, 2911 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2912 NULL, 2913 MatGetGhosts_MPIAIJ, 2914 NULL, 2915 NULL, 2916 /*119*/MatMultDiagonalBlock_MPIAIJ, 2917 NULL, 2918 NULL, 2919 NULL, 2920 MatGetMultiProcBlock_MPIAIJ, 2921 /*124*/MatFindNonzeroRows_MPIAIJ, 2922 MatGetColumnNorms_MPIAIJ, 2923 MatInvertBlockDiagonal_MPIAIJ, 2924 MatInvertVariableBlockDiagonal_MPIAIJ, 2925 MatCreateSubMatricesMPI_MPIAIJ, 2926 /*129*/NULL, 2927 NULL, 2928 NULL, 2929 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2930 NULL, 2931 /*134*/NULL, 2932 NULL, 2933 NULL, 2934 NULL, 2935 NULL, 2936 /*139*/MatSetBlockSizes_MPIAIJ, 2937 NULL, 2938 NULL, 2939 MatFDColoringSetUp_MPIXAIJ, 2940 MatFindOffBlockDiagonalEntries_MPIAIJ, 2941 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2942 /*145*/NULL, 2943 NULL, 2944 NULL 2945 }; 2946 2947 /* ----------------------------------------------------------------------------------------*/ 2948 2949 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2950 { 2951 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2952 PetscErrorCode ierr; 2953 2954 PetscFunctionBegin; 2955 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2956 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2957 PetscFunctionReturn(0); 2958 } 2959 2960 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2961 { 2962 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2963 PetscErrorCode ierr; 2964 2965 PetscFunctionBegin; 2966 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2967 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2968 PetscFunctionReturn(0); 2969 } 2970 2971 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2972 { 2973 Mat_MPIAIJ *b; 2974 PetscErrorCode ierr; 2975 PetscMPIInt size; 2976 2977 PetscFunctionBegin; 2978 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2979 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2980 b = (Mat_MPIAIJ*)B->data; 2981 2982 #if defined(PETSC_USE_CTABLE) 2983 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2984 #else 2985 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2986 #endif 2987 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2988 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2989 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2990 2991 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2992 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2993 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2994 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2995 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2996 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2997 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2998 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2999 3000 if (!B->preallocated) { 3001 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3002 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3003 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3004 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3005 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3006 } 3007 3008 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3009 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3010 B->preallocated = PETSC_TRUE; 3011 B->was_assembled = PETSC_FALSE; 3012 B->assembled = PETSC_FALSE; 3013 PetscFunctionReturn(0); 3014 } 3015 3016 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 3017 { 3018 Mat_MPIAIJ *b; 3019 PetscErrorCode ierr; 3020 3021 PetscFunctionBegin; 3022 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3023 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3024 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3025 b = (Mat_MPIAIJ*)B->data; 3026 3027 #if defined(PETSC_USE_CTABLE) 3028 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 3029 #else 3030 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 3031 #endif 3032 ierr = PetscFree(b->garray);CHKERRQ(ierr); 3033 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 3034 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 3035 3036 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 3037 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 3038 B->preallocated = PETSC_TRUE; 3039 B->was_assembled = PETSC_FALSE; 3040 B->assembled = PETSC_FALSE; 3041 PetscFunctionReturn(0); 3042 } 3043 3044 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3045 { 3046 Mat mat; 3047 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3048 PetscErrorCode ierr; 3049 3050 PetscFunctionBegin; 3051 *newmat = NULL; 3052 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3053 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3054 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3055 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3056 a = (Mat_MPIAIJ*)mat->data; 3057 3058 mat->factortype = matin->factortype; 3059 mat->assembled = matin->assembled; 3060 mat->insertmode = NOT_SET_VALUES; 3061 mat->preallocated = matin->preallocated; 3062 3063 a->size = oldmat->size; 3064 a->rank = oldmat->rank; 3065 a->donotstash = oldmat->donotstash; 3066 a->roworiented = oldmat->roworiented; 3067 a->rowindices = NULL; 3068 a->rowvalues = NULL; 3069 a->getrowactive = PETSC_FALSE; 3070 3071 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3072 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3073 3074 if (oldmat->colmap) { 3075 #if defined(PETSC_USE_CTABLE) 3076 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3077 #else 3078 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 3079 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3080 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 3081 #endif 3082 } else a->colmap = NULL; 3083 if (oldmat->garray) { 3084 PetscInt len; 3085 len = oldmat->B->cmap->n; 3086 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 3087 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3088 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 3089 } else a->garray = NULL; 3090 3091 /* It may happen MatDuplicate is called with a non-assembled matrix 3092 In fact, MatDuplicate only requires the matrix to be preallocated 3093 This may happen inside a DMCreateMatrix_Shell */ 3094 if (oldmat->lvec) { 3095 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3096 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3097 } 3098 if (oldmat->Mvctx) { 3099 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3100 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3101 } 3102 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3103 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3104 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3105 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3106 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3107 *newmat = mat; 3108 PetscFunctionReturn(0); 3109 } 3110 3111 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3112 { 3113 PetscBool isbinary, ishdf5; 3114 PetscErrorCode ierr; 3115 3116 PetscFunctionBegin; 3117 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3118 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3119 /* force binary viewer to load .info file if it has not yet done so */ 3120 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3121 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3122 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3123 if (isbinary) { 3124 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3125 } else if (ishdf5) { 3126 #if defined(PETSC_HAVE_HDF5) 3127 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3128 #else 3129 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3130 #endif 3131 } else { 3132 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3133 } 3134 PetscFunctionReturn(0); 3135 } 3136 3137 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3138 { 3139 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3140 PetscInt *rowidxs,*colidxs; 3141 PetscScalar *matvals; 3142 PetscErrorCode ierr; 3143 3144 PetscFunctionBegin; 3145 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3146 3147 /* read in matrix header */ 3148 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3149 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3150 M = header[1]; N = header[2]; nz = header[3]; 3151 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3152 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3153 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3154 3155 /* set block sizes from the viewer's .info file */ 3156 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3157 /* set global sizes if not set already */ 3158 if (mat->rmap->N < 0) mat->rmap->N = M; 3159 if (mat->cmap->N < 0) mat->cmap->N = N; 3160 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3161 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3162 3163 /* check if the matrix sizes are correct */ 3164 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3165 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3166 3167 /* read in row lengths and build row indices */ 3168 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3169 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3170 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3171 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3172 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3173 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3174 /* read in column indices and matrix values */ 3175 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3176 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3177 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3178 /* store matrix indices and values */ 3179 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3180 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3181 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3182 PetscFunctionReturn(0); 3183 } 3184 3185 /* Not scalable because of ISAllGather() unless getting all columns. */ 3186 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3187 { 3188 PetscErrorCode ierr; 3189 IS iscol_local; 3190 PetscBool isstride; 3191 PetscMPIInt lisstride=0,gisstride; 3192 3193 PetscFunctionBegin; 3194 /* check if we are grabbing all columns*/ 3195 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3196 3197 if (isstride) { 3198 PetscInt start,len,mstart,mlen; 3199 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3200 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3201 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3202 if (mstart == start && mlen-mstart == len) lisstride = 1; 3203 } 3204 3205 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3206 if (gisstride) { 3207 PetscInt N; 3208 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3209 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3210 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3211 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3212 } else { 3213 PetscInt cbs; 3214 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3215 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3216 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3217 } 3218 3219 *isseq = iscol_local; 3220 PetscFunctionReturn(0); 3221 } 3222 3223 /* 3224 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3225 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3226 3227 Input Parameters: 3228 mat - matrix 3229 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3230 i.e., mat->rstart <= isrow[i] < mat->rend 3231 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3232 i.e., mat->cstart <= iscol[i] < mat->cend 3233 Output Parameter: 3234 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3235 iscol_o - sequential column index set for retrieving mat->B 3236 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3237 */ 3238 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3239 { 3240 PetscErrorCode ierr; 3241 Vec x,cmap; 3242 const PetscInt *is_idx; 3243 PetscScalar *xarray,*cmaparray; 3244 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3245 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3246 Mat B=a->B; 3247 Vec lvec=a->lvec,lcmap; 3248 PetscInt i,cstart,cend,Bn=B->cmap->N; 3249 MPI_Comm comm; 3250 VecScatter Mvctx=a->Mvctx; 3251 3252 PetscFunctionBegin; 3253 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3254 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3255 3256 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3257 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3258 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3259 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3260 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3261 3262 /* Get start indices */ 3263 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3264 isstart -= ncols; 3265 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3266 3267 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3268 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3269 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3270 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3271 for (i=0; i<ncols; i++) { 3272 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3273 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3274 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3275 } 3276 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3277 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3278 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3279 3280 /* Get iscol_d */ 3281 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3282 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3283 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3284 3285 /* Get isrow_d */ 3286 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3287 rstart = mat->rmap->rstart; 3288 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3289 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3290 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3291 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3292 3293 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3294 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3295 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3296 3297 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3298 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3299 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3300 3301 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3302 3303 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3304 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3305 3306 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3307 /* off-process column indices */ 3308 count = 0; 3309 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3310 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3311 3312 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3313 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3314 for (i=0; i<Bn; i++) { 3315 if (PetscRealPart(xarray[i]) > -1.0) { 3316 idx[count] = i; /* local column index in off-diagonal part B */ 3317 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3318 count++; 3319 } 3320 } 3321 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3322 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3323 3324 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3325 /* cannot ensure iscol_o has same blocksize as iscol! */ 3326 3327 ierr = PetscFree(idx);CHKERRQ(ierr); 3328 *garray = cmap1; 3329 3330 ierr = VecDestroy(&x);CHKERRQ(ierr); 3331 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3332 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3333 PetscFunctionReturn(0); 3334 } 3335 3336 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3337 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3338 { 3339 PetscErrorCode ierr; 3340 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3341 Mat M = NULL; 3342 MPI_Comm comm; 3343 IS iscol_d,isrow_d,iscol_o; 3344 Mat Asub = NULL,Bsub = NULL; 3345 PetscInt n; 3346 3347 PetscFunctionBegin; 3348 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3349 3350 if (call == MAT_REUSE_MATRIX) { 3351 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3352 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3353 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3354 3355 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3356 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3357 3358 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3359 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3360 3361 /* Update diagonal and off-diagonal portions of submat */ 3362 asub = (Mat_MPIAIJ*)(*submat)->data; 3363 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3364 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3365 if (n) { 3366 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3367 } 3368 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3369 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3370 3371 } else { /* call == MAT_INITIAL_MATRIX) */ 3372 const PetscInt *garray; 3373 PetscInt BsubN; 3374 3375 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3376 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3377 3378 /* Create local submatrices Asub and Bsub */ 3379 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3380 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3381 3382 /* Create submatrix M */ 3383 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3384 3385 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3386 asub = (Mat_MPIAIJ*)M->data; 3387 3388 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3389 n = asub->B->cmap->N; 3390 if (BsubN > n) { 3391 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3392 const PetscInt *idx; 3393 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3394 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3395 3396 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3397 j = 0; 3398 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3399 for (i=0; i<n; i++) { 3400 if (j >= BsubN) break; 3401 while (subgarray[i] > garray[j]) j++; 3402 3403 if (subgarray[i] == garray[j]) { 3404 idx_new[i] = idx[j++]; 3405 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3406 } 3407 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3408 3409 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3410 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3411 3412 } else if (BsubN < n) { 3413 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3414 } 3415 3416 ierr = PetscFree(garray);CHKERRQ(ierr); 3417 *submat = M; 3418 3419 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3420 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3421 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3422 3423 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3424 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3425 3426 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3427 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3428 } 3429 PetscFunctionReturn(0); 3430 } 3431 3432 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3433 { 3434 PetscErrorCode ierr; 3435 IS iscol_local=NULL,isrow_d; 3436 PetscInt csize; 3437 PetscInt n,i,j,start,end; 3438 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3439 MPI_Comm comm; 3440 3441 PetscFunctionBegin; 3442 /* If isrow has same processor distribution as mat, 3443 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3444 if (call == MAT_REUSE_MATRIX) { 3445 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3446 if (isrow_d) { 3447 sameRowDist = PETSC_TRUE; 3448 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3449 } else { 3450 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3451 if (iscol_local) { 3452 sameRowDist = PETSC_TRUE; 3453 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3454 } 3455 } 3456 } else { 3457 /* Check if isrow has same processor distribution as mat */ 3458 sameDist[0] = PETSC_FALSE; 3459 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3460 if (!n) { 3461 sameDist[0] = PETSC_TRUE; 3462 } else { 3463 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3464 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3465 if (i >= start && j < end) { 3466 sameDist[0] = PETSC_TRUE; 3467 } 3468 } 3469 3470 /* Check if iscol has same processor distribution as mat */ 3471 sameDist[1] = PETSC_FALSE; 3472 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3473 if (!n) { 3474 sameDist[1] = PETSC_TRUE; 3475 } else { 3476 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3477 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3478 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3479 } 3480 3481 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3482 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3483 sameRowDist = tsameDist[0]; 3484 } 3485 3486 if (sameRowDist) { 3487 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3488 /* isrow and iscol have same processor distribution as mat */ 3489 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3490 PetscFunctionReturn(0); 3491 } else { /* sameRowDist */ 3492 /* isrow has same processor distribution as mat */ 3493 if (call == MAT_INITIAL_MATRIX) { 3494 PetscBool sorted; 3495 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3496 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3497 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3498 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3499 3500 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3501 if (sorted) { 3502 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3503 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3504 PetscFunctionReturn(0); 3505 } 3506 } else { /* call == MAT_REUSE_MATRIX */ 3507 IS iscol_sub; 3508 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3509 if (iscol_sub) { 3510 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3511 PetscFunctionReturn(0); 3512 } 3513 } 3514 } 3515 } 3516 3517 /* General case: iscol -> iscol_local which has global size of iscol */ 3518 if (call == MAT_REUSE_MATRIX) { 3519 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3520 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3521 } else { 3522 if (!iscol_local) { 3523 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3524 } 3525 } 3526 3527 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3528 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3529 3530 if (call == MAT_INITIAL_MATRIX) { 3531 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3532 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3533 } 3534 PetscFunctionReturn(0); 3535 } 3536 3537 /*@C 3538 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3539 and "off-diagonal" part of the matrix in CSR format. 3540 3541 Collective 3542 3543 Input Parameters: 3544 + comm - MPI communicator 3545 . A - "diagonal" portion of matrix 3546 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3547 - garray - global index of B columns 3548 3549 Output Parameter: 3550 . mat - the matrix, with input A as its local diagonal matrix 3551 Level: advanced 3552 3553 Notes: 3554 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3555 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3556 3557 .seealso: MatCreateMPIAIJWithSplitArrays() 3558 @*/ 3559 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3560 { 3561 PetscErrorCode ierr; 3562 Mat_MPIAIJ *maij; 3563 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3564 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3565 PetscScalar *oa=b->a; 3566 Mat Bnew; 3567 PetscInt m,n,N; 3568 3569 PetscFunctionBegin; 3570 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3571 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3572 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3573 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3574 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3575 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3576 3577 /* Get global columns of mat */ 3578 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3579 3580 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3581 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3582 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3583 maij = (Mat_MPIAIJ*)(*mat)->data; 3584 3585 (*mat)->preallocated = PETSC_TRUE; 3586 3587 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3588 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3589 3590 /* Set A as diagonal portion of *mat */ 3591 maij->A = A; 3592 3593 nz = oi[m]; 3594 for (i=0; i<nz; i++) { 3595 col = oj[i]; 3596 oj[i] = garray[col]; 3597 } 3598 3599 /* Set Bnew as off-diagonal portion of *mat */ 3600 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3601 bnew = (Mat_SeqAIJ*)Bnew->data; 3602 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3603 maij->B = Bnew; 3604 3605 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3606 3607 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3608 b->free_a = PETSC_FALSE; 3609 b->free_ij = PETSC_FALSE; 3610 ierr = MatDestroy(&B);CHKERRQ(ierr); 3611 3612 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3613 bnew->free_a = PETSC_TRUE; 3614 bnew->free_ij = PETSC_TRUE; 3615 3616 /* condense columns of maij->B */ 3617 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3618 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3619 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3620 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3621 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3622 PetscFunctionReturn(0); 3623 } 3624 3625 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3626 3627 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3628 { 3629 PetscErrorCode ierr; 3630 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3631 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3632 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3633 Mat M,Msub,B=a->B; 3634 MatScalar *aa; 3635 Mat_SeqAIJ *aij; 3636 PetscInt *garray = a->garray,*colsub,Ncols; 3637 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3638 IS iscol_sub,iscmap; 3639 const PetscInt *is_idx,*cmap; 3640 PetscBool allcolumns=PETSC_FALSE; 3641 MPI_Comm comm; 3642 3643 PetscFunctionBegin; 3644 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3645 if (call == MAT_REUSE_MATRIX) { 3646 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3647 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3648 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3649 3650 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3651 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3652 3653 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3654 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3655 3656 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3657 3658 } else { /* call == MAT_INITIAL_MATRIX) */ 3659 PetscBool flg; 3660 3661 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3662 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3663 3664 /* (1) iscol -> nonscalable iscol_local */ 3665 /* Check for special case: each processor gets entire matrix columns */ 3666 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3667 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3668 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3669 if (allcolumns) { 3670 iscol_sub = iscol_local; 3671 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3672 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3673 3674 } else { 3675 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3676 PetscInt *idx,*cmap1,k; 3677 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3678 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3679 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3680 count = 0; 3681 k = 0; 3682 for (i=0; i<Ncols; i++) { 3683 j = is_idx[i]; 3684 if (j >= cstart && j < cend) { 3685 /* diagonal part of mat */ 3686 idx[count] = j; 3687 cmap1[count++] = i; /* column index in submat */ 3688 } else if (Bn) { 3689 /* off-diagonal part of mat */ 3690 if (j == garray[k]) { 3691 idx[count] = j; 3692 cmap1[count++] = i; /* column index in submat */ 3693 } else if (j > garray[k]) { 3694 while (j > garray[k] && k < Bn-1) k++; 3695 if (j == garray[k]) { 3696 idx[count] = j; 3697 cmap1[count++] = i; /* column index in submat */ 3698 } 3699 } 3700 } 3701 } 3702 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3703 3704 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3705 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3706 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3707 3708 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3709 } 3710 3711 /* (3) Create sequential Msub */ 3712 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3713 } 3714 3715 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3716 aij = (Mat_SeqAIJ*)(Msub)->data; 3717 ii = aij->i; 3718 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3719 3720 /* 3721 m - number of local rows 3722 Ncols - number of columns (same on all processors) 3723 rstart - first row in new global matrix generated 3724 */ 3725 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3726 3727 if (call == MAT_INITIAL_MATRIX) { 3728 /* (4) Create parallel newmat */ 3729 PetscMPIInt rank,size; 3730 PetscInt csize; 3731 3732 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3733 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3734 3735 /* 3736 Determine the number of non-zeros in the diagonal and off-diagonal 3737 portions of the matrix in order to do correct preallocation 3738 */ 3739 3740 /* first get start and end of "diagonal" columns */ 3741 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3742 if (csize == PETSC_DECIDE) { 3743 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3744 if (mglobal == Ncols) { /* square matrix */ 3745 nlocal = m; 3746 } else { 3747 nlocal = Ncols/size + ((Ncols % size) > rank); 3748 } 3749 } else { 3750 nlocal = csize; 3751 } 3752 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3753 rstart = rend - nlocal; 3754 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3755 3756 /* next, compute all the lengths */ 3757 jj = aij->j; 3758 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3759 olens = dlens + m; 3760 for (i=0; i<m; i++) { 3761 jend = ii[i+1] - ii[i]; 3762 olen = 0; 3763 dlen = 0; 3764 for (j=0; j<jend; j++) { 3765 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3766 else dlen++; 3767 jj++; 3768 } 3769 olens[i] = olen; 3770 dlens[i] = dlen; 3771 } 3772 3773 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3774 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3775 3776 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3777 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3778 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3779 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3780 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3781 ierr = PetscFree(dlens);CHKERRQ(ierr); 3782 3783 } else { /* call == MAT_REUSE_MATRIX */ 3784 M = *newmat; 3785 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3786 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3787 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3788 /* 3789 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3790 rather than the slower MatSetValues(). 3791 */ 3792 M->was_assembled = PETSC_TRUE; 3793 M->assembled = PETSC_FALSE; 3794 } 3795 3796 /* (5) Set values of Msub to *newmat */ 3797 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3798 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3799 3800 jj = aij->j; 3801 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3802 for (i=0; i<m; i++) { 3803 row = rstart + i; 3804 nz = ii[i+1] - ii[i]; 3805 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3806 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3807 jj += nz; aa += nz; 3808 } 3809 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3810 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3811 3812 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3813 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3814 3815 ierr = PetscFree(colsub);CHKERRQ(ierr); 3816 3817 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3818 if (call == MAT_INITIAL_MATRIX) { 3819 *newmat = M; 3820 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3821 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3822 3823 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3824 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3825 3826 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3827 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3828 3829 if (iscol_local) { 3830 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3831 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3832 } 3833 } 3834 PetscFunctionReturn(0); 3835 } 3836 3837 /* 3838 Not great since it makes two copies of the submatrix, first an SeqAIJ 3839 in local and then by concatenating the local matrices the end result. 3840 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3841 3842 Note: This requires a sequential iscol with all indices. 3843 */ 3844 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3845 { 3846 PetscErrorCode ierr; 3847 PetscMPIInt rank,size; 3848 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3849 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3850 Mat M,Mreuse; 3851 MatScalar *aa,*vwork; 3852 MPI_Comm comm; 3853 Mat_SeqAIJ *aij; 3854 PetscBool colflag,allcolumns=PETSC_FALSE; 3855 3856 PetscFunctionBegin; 3857 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3858 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3859 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3860 3861 /* Check for special case: each processor gets entire matrix columns */ 3862 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3863 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3864 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3865 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3866 3867 if (call == MAT_REUSE_MATRIX) { 3868 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3869 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3870 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3871 } else { 3872 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3873 } 3874 3875 /* 3876 m - number of local rows 3877 n - number of columns (same on all processors) 3878 rstart - first row in new global matrix generated 3879 */ 3880 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3881 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3882 if (call == MAT_INITIAL_MATRIX) { 3883 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3884 ii = aij->i; 3885 jj = aij->j; 3886 3887 /* 3888 Determine the number of non-zeros in the diagonal and off-diagonal 3889 portions of the matrix in order to do correct preallocation 3890 */ 3891 3892 /* first get start and end of "diagonal" columns */ 3893 if (csize == PETSC_DECIDE) { 3894 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3895 if (mglobal == n) { /* square matrix */ 3896 nlocal = m; 3897 } else { 3898 nlocal = n/size + ((n % size) > rank); 3899 } 3900 } else { 3901 nlocal = csize; 3902 } 3903 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3904 rstart = rend - nlocal; 3905 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3906 3907 /* next, compute all the lengths */ 3908 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3909 olens = dlens + m; 3910 for (i=0; i<m; i++) { 3911 jend = ii[i+1] - ii[i]; 3912 olen = 0; 3913 dlen = 0; 3914 for (j=0; j<jend; j++) { 3915 if (*jj < rstart || *jj >= rend) olen++; 3916 else dlen++; 3917 jj++; 3918 } 3919 olens[i] = olen; 3920 dlens[i] = dlen; 3921 } 3922 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3923 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3924 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3925 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3926 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3927 ierr = PetscFree(dlens);CHKERRQ(ierr); 3928 } else { 3929 PetscInt ml,nl; 3930 3931 M = *newmat; 3932 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3933 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3934 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3935 /* 3936 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3937 rather than the slower MatSetValues(). 3938 */ 3939 M->was_assembled = PETSC_TRUE; 3940 M->assembled = PETSC_FALSE; 3941 } 3942 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3943 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3944 ii = aij->i; 3945 jj = aij->j; 3946 3947 /* trigger copy to CPU if needed */ 3948 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3949 for (i=0; i<m; i++) { 3950 row = rstart + i; 3951 nz = ii[i+1] - ii[i]; 3952 cwork = jj; jj += nz; 3953 vwork = aa; aa += nz; 3954 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3955 } 3956 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3957 3958 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3959 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3960 *newmat = M; 3961 3962 /* save submatrix used in processor for next request */ 3963 if (call == MAT_INITIAL_MATRIX) { 3964 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3965 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3966 } 3967 PetscFunctionReturn(0); 3968 } 3969 3970 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3971 { 3972 PetscInt m,cstart, cend,j,nnz,i,d; 3973 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3974 const PetscInt *JJ; 3975 PetscErrorCode ierr; 3976 PetscBool nooffprocentries; 3977 3978 PetscFunctionBegin; 3979 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3980 3981 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3982 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3983 m = B->rmap->n; 3984 cstart = B->cmap->rstart; 3985 cend = B->cmap->rend; 3986 rstart = B->rmap->rstart; 3987 3988 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3989 3990 if (PetscDefined(USE_DEBUG)) { 3991 for (i=0; i<m; i++) { 3992 nnz = Ii[i+1]- Ii[i]; 3993 JJ = J + Ii[i]; 3994 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3995 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3996 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3997 } 3998 } 3999 4000 for (i=0; i<m; i++) { 4001 nnz = Ii[i+1]- Ii[i]; 4002 JJ = J + Ii[i]; 4003 nnz_max = PetscMax(nnz_max,nnz); 4004 d = 0; 4005 for (j=0; j<nnz; j++) { 4006 if (cstart <= JJ[j] && JJ[j] < cend) d++; 4007 } 4008 d_nnz[i] = d; 4009 o_nnz[i] = nnz - d; 4010 } 4011 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 4012 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 4013 4014 for (i=0; i<m; i++) { 4015 ii = i + rstart; 4016 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 4017 } 4018 nooffprocentries = B->nooffprocentries; 4019 B->nooffprocentries = PETSC_TRUE; 4020 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4021 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4022 B->nooffprocentries = nooffprocentries; 4023 4024 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4025 PetscFunctionReturn(0); 4026 } 4027 4028 /*@ 4029 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4030 (the default parallel PETSc format). 4031 4032 Collective 4033 4034 Input Parameters: 4035 + B - the matrix 4036 . i - the indices into j for the start of each local row (starts with zero) 4037 . j - the column indices for each local row (starts with zero) 4038 - v - optional values in the matrix 4039 4040 Level: developer 4041 4042 Notes: 4043 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 4044 thus you CANNOT change the matrix entries by changing the values of v[] after you have 4045 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4046 4047 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4048 4049 The format which is used for the sparse matrix input, is equivalent to a 4050 row-major ordering.. i.e for the following matrix, the input data expected is 4051 as shown 4052 4053 $ 1 0 0 4054 $ 2 0 3 P0 4055 $ ------- 4056 $ 4 5 6 P1 4057 $ 4058 $ Process0 [P0]: rows_owned=[0,1] 4059 $ i = {0,1,3} [size = nrow+1 = 2+1] 4060 $ j = {0,0,2} [size = 3] 4061 $ v = {1,2,3} [size = 3] 4062 $ 4063 $ Process1 [P1]: rows_owned=[2] 4064 $ i = {0,3} [size = nrow+1 = 1+1] 4065 $ j = {0,1,2} [size = 3] 4066 $ v = {4,5,6} [size = 3] 4067 4068 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4069 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4070 @*/ 4071 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4072 { 4073 PetscErrorCode ierr; 4074 4075 PetscFunctionBegin; 4076 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4077 PetscFunctionReturn(0); 4078 } 4079 4080 /*@C 4081 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4082 (the default parallel PETSc format). For good matrix assembly performance 4083 the user should preallocate the matrix storage by setting the parameters 4084 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4085 performance can be increased by more than a factor of 50. 4086 4087 Collective 4088 4089 Input Parameters: 4090 + B - the matrix 4091 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4092 (same value is used for all local rows) 4093 . d_nnz - array containing the number of nonzeros in the various rows of the 4094 DIAGONAL portion of the local submatrix (possibly different for each row) 4095 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4096 The size of this array is equal to the number of local rows, i.e 'm'. 4097 For matrices that will be factored, you must leave room for (and set) 4098 the diagonal entry even if it is zero. 4099 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4100 submatrix (same value is used for all local rows). 4101 - o_nnz - array containing the number of nonzeros in the various rows of the 4102 OFF-DIAGONAL portion of the local submatrix (possibly different for 4103 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4104 structure. The size of this array is equal to the number 4105 of local rows, i.e 'm'. 4106 4107 If the *_nnz parameter is given then the *_nz parameter is ignored 4108 4109 The AIJ format (also called the Yale sparse matrix format or 4110 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4111 storage. The stored row and column indices begin with zero. 4112 See Users-Manual: ch_mat for details. 4113 4114 The parallel matrix is partitioned such that the first m0 rows belong to 4115 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4116 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4117 4118 The DIAGONAL portion of the local submatrix of a processor can be defined 4119 as the submatrix which is obtained by extraction the part corresponding to 4120 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4121 first row that belongs to the processor, r2 is the last row belonging to 4122 the this processor, and c1-c2 is range of indices of the local part of a 4123 vector suitable for applying the matrix to. This is an mxn matrix. In the 4124 common case of a square matrix, the row and column ranges are the same and 4125 the DIAGONAL part is also square. The remaining portion of the local 4126 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4127 4128 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4129 4130 You can call MatGetInfo() to get information on how effective the preallocation was; 4131 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4132 You can also run with the option -info and look for messages with the string 4133 malloc in them to see if additional memory allocation was needed. 4134 4135 Example usage: 4136 4137 Consider the following 8x8 matrix with 34 non-zero values, that is 4138 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4139 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4140 as follows: 4141 4142 .vb 4143 1 2 0 | 0 3 0 | 0 4 4144 Proc0 0 5 6 | 7 0 0 | 8 0 4145 9 0 10 | 11 0 0 | 12 0 4146 ------------------------------------- 4147 13 0 14 | 15 16 17 | 0 0 4148 Proc1 0 18 0 | 19 20 21 | 0 0 4149 0 0 0 | 22 23 0 | 24 0 4150 ------------------------------------- 4151 Proc2 25 26 27 | 0 0 28 | 29 0 4152 30 0 0 | 31 32 33 | 0 34 4153 .ve 4154 4155 This can be represented as a collection of submatrices as: 4156 4157 .vb 4158 A B C 4159 D E F 4160 G H I 4161 .ve 4162 4163 Where the submatrices A,B,C are owned by proc0, D,E,F are 4164 owned by proc1, G,H,I are owned by proc2. 4165 4166 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4167 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4168 The 'M','N' parameters are 8,8, and have the same values on all procs. 4169 4170 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4171 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4172 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4173 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4174 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4175 matrix, ans [DF] as another SeqAIJ matrix. 4176 4177 When d_nz, o_nz parameters are specified, d_nz storage elements are 4178 allocated for every row of the local diagonal submatrix, and o_nz 4179 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4180 One way to choose d_nz and o_nz is to use the max nonzerors per local 4181 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4182 In this case, the values of d_nz,o_nz are: 4183 .vb 4184 proc0 : dnz = 2, o_nz = 2 4185 proc1 : dnz = 3, o_nz = 2 4186 proc2 : dnz = 1, o_nz = 4 4187 .ve 4188 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4189 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4190 for proc3. i.e we are using 12+15+10=37 storage locations to store 4191 34 values. 4192 4193 When d_nnz, o_nnz parameters are specified, the storage is specified 4194 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4195 In the above case the values for d_nnz,o_nnz are: 4196 .vb 4197 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4198 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4199 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4200 .ve 4201 Here the space allocated is sum of all the above values i.e 34, and 4202 hence pre-allocation is perfect. 4203 4204 Level: intermediate 4205 4206 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4207 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4208 @*/ 4209 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4210 { 4211 PetscErrorCode ierr; 4212 4213 PetscFunctionBegin; 4214 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4215 PetscValidType(B,1); 4216 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4217 PetscFunctionReturn(0); 4218 } 4219 4220 /*@ 4221 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4222 CSR format for the local rows. 4223 4224 Collective 4225 4226 Input Parameters: 4227 + comm - MPI communicator 4228 . m - number of local rows (Cannot be PETSC_DECIDE) 4229 . n - This value should be the same as the local size used in creating the 4230 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4231 calculated if N is given) For square matrices n is almost always m. 4232 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4233 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4234 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4235 . j - column indices 4236 - a - matrix values 4237 4238 Output Parameter: 4239 . mat - the matrix 4240 4241 Level: intermediate 4242 4243 Notes: 4244 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4245 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4246 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4247 4248 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4249 4250 The format which is used for the sparse matrix input, is equivalent to a 4251 row-major ordering.. i.e for the following matrix, the input data expected is 4252 as shown 4253 4254 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4255 4256 $ 1 0 0 4257 $ 2 0 3 P0 4258 $ ------- 4259 $ 4 5 6 P1 4260 $ 4261 $ Process0 [P0]: rows_owned=[0,1] 4262 $ i = {0,1,3} [size = nrow+1 = 2+1] 4263 $ j = {0,0,2} [size = 3] 4264 $ v = {1,2,3} [size = 3] 4265 $ 4266 $ Process1 [P1]: rows_owned=[2] 4267 $ i = {0,3} [size = nrow+1 = 1+1] 4268 $ j = {0,1,2} [size = 3] 4269 $ v = {4,5,6} [size = 3] 4270 4271 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4272 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4273 @*/ 4274 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4275 { 4276 PetscErrorCode ierr; 4277 4278 PetscFunctionBegin; 4279 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4280 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4281 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4282 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4283 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4284 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4285 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4286 PetscFunctionReturn(0); 4287 } 4288 4289 /*@ 4290 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4291 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4292 4293 Collective 4294 4295 Input Parameters: 4296 + mat - the matrix 4297 . m - number of local rows (Cannot be PETSC_DECIDE) 4298 . n - This value should be the same as the local size used in creating the 4299 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4300 calculated if N is given) For square matrices n is almost always m. 4301 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4302 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4303 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4304 . J - column indices 4305 - v - matrix values 4306 4307 Level: intermediate 4308 4309 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4310 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4311 @*/ 4312 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4313 { 4314 PetscErrorCode ierr; 4315 PetscInt cstart,nnz,i,j; 4316 PetscInt *ld; 4317 PetscBool nooffprocentries; 4318 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4319 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4320 PetscScalar *ad = Ad->a, *ao = Ao->a; 4321 const PetscInt *Adi = Ad->i; 4322 PetscInt ldi,Iii,md; 4323 4324 PetscFunctionBegin; 4325 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4326 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4327 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4328 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4329 4330 cstart = mat->cmap->rstart; 4331 if (!Aij->ld) { 4332 /* count number of entries below block diagonal */ 4333 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4334 Aij->ld = ld; 4335 for (i=0; i<m; i++) { 4336 nnz = Ii[i+1]- Ii[i]; 4337 j = 0; 4338 while (J[j] < cstart && j < nnz) {j++;} 4339 J += nnz; 4340 ld[i] = j; 4341 } 4342 } else { 4343 ld = Aij->ld; 4344 } 4345 4346 for (i=0; i<m; i++) { 4347 nnz = Ii[i+1]- Ii[i]; 4348 Iii = Ii[i]; 4349 ldi = ld[i]; 4350 md = Adi[i+1]-Adi[i]; 4351 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4352 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4353 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4354 ad += md; 4355 ao += nnz - md; 4356 } 4357 nooffprocentries = mat->nooffprocentries; 4358 mat->nooffprocentries = PETSC_TRUE; 4359 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4360 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4361 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4362 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4363 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4364 mat->nooffprocentries = nooffprocentries; 4365 PetscFunctionReturn(0); 4366 } 4367 4368 /*@C 4369 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4370 (the default parallel PETSc format). For good matrix assembly performance 4371 the user should preallocate the matrix storage by setting the parameters 4372 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4373 performance can be increased by more than a factor of 50. 4374 4375 Collective 4376 4377 Input Parameters: 4378 + comm - MPI communicator 4379 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4380 This value should be the same as the local size used in creating the 4381 y vector for the matrix-vector product y = Ax. 4382 . n - This value should be the same as the local size used in creating the 4383 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4384 calculated if N is given) For square matrices n is almost always m. 4385 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4386 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4387 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4388 (same value is used for all local rows) 4389 . d_nnz - array containing the number of nonzeros in the various rows of the 4390 DIAGONAL portion of the local submatrix (possibly different for each row) 4391 or NULL, if d_nz is used to specify the nonzero structure. 4392 The size of this array is equal to the number of local rows, i.e 'm'. 4393 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4394 submatrix (same value is used for all local rows). 4395 - o_nnz - array containing the number of nonzeros in the various rows of the 4396 OFF-DIAGONAL portion of the local submatrix (possibly different for 4397 each row) or NULL, if o_nz is used to specify the nonzero 4398 structure. The size of this array is equal to the number 4399 of local rows, i.e 'm'. 4400 4401 Output Parameter: 4402 . A - the matrix 4403 4404 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4405 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4406 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4407 4408 Notes: 4409 If the *_nnz parameter is given then the *_nz parameter is ignored 4410 4411 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4412 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4413 storage requirements for this matrix. 4414 4415 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4416 processor than it must be used on all processors that share the object for 4417 that argument. 4418 4419 The user MUST specify either the local or global matrix dimensions 4420 (possibly both). 4421 4422 The parallel matrix is partitioned across processors such that the 4423 first m0 rows belong to process 0, the next m1 rows belong to 4424 process 1, the next m2 rows belong to process 2 etc.. where 4425 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4426 values corresponding to [m x N] submatrix. 4427 4428 The columns are logically partitioned with the n0 columns belonging 4429 to 0th partition, the next n1 columns belonging to the next 4430 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4431 4432 The DIAGONAL portion of the local submatrix on any given processor 4433 is the submatrix corresponding to the rows and columns m,n 4434 corresponding to the given processor. i.e diagonal matrix on 4435 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4436 etc. The remaining portion of the local submatrix [m x (N-n)] 4437 constitute the OFF-DIAGONAL portion. The example below better 4438 illustrates this concept. 4439 4440 For a square global matrix we define each processor's diagonal portion 4441 to be its local rows and the corresponding columns (a square submatrix); 4442 each processor's off-diagonal portion encompasses the remainder of the 4443 local matrix (a rectangular submatrix). 4444 4445 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4446 4447 When calling this routine with a single process communicator, a matrix of 4448 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4449 type of communicator, use the construction mechanism 4450 .vb 4451 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4452 .ve 4453 4454 $ MatCreate(...,&A); 4455 $ MatSetType(A,MATMPIAIJ); 4456 $ MatSetSizes(A, m,n,M,N); 4457 $ MatMPIAIJSetPreallocation(A,...); 4458 4459 By default, this format uses inodes (identical nodes) when possible. 4460 We search for consecutive rows with the same nonzero structure, thereby 4461 reusing matrix information to achieve increased efficiency. 4462 4463 Options Database Keys: 4464 + -mat_no_inode - Do not use inodes 4465 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4466 4467 4468 4469 Example usage: 4470 4471 Consider the following 8x8 matrix with 34 non-zero values, that is 4472 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4473 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4474 as follows 4475 4476 .vb 4477 1 2 0 | 0 3 0 | 0 4 4478 Proc0 0 5 6 | 7 0 0 | 8 0 4479 9 0 10 | 11 0 0 | 12 0 4480 ------------------------------------- 4481 13 0 14 | 15 16 17 | 0 0 4482 Proc1 0 18 0 | 19 20 21 | 0 0 4483 0 0 0 | 22 23 0 | 24 0 4484 ------------------------------------- 4485 Proc2 25 26 27 | 0 0 28 | 29 0 4486 30 0 0 | 31 32 33 | 0 34 4487 .ve 4488 4489 This can be represented as a collection of submatrices as 4490 4491 .vb 4492 A B C 4493 D E F 4494 G H I 4495 .ve 4496 4497 Where the submatrices A,B,C are owned by proc0, D,E,F are 4498 owned by proc1, G,H,I are owned by proc2. 4499 4500 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4501 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4502 The 'M','N' parameters are 8,8, and have the same values on all procs. 4503 4504 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4505 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4506 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4507 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4508 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4509 matrix, ans [DF] as another SeqAIJ matrix. 4510 4511 When d_nz, o_nz parameters are specified, d_nz storage elements are 4512 allocated for every row of the local diagonal submatrix, and o_nz 4513 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4514 One way to choose d_nz and o_nz is to use the max nonzerors per local 4515 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4516 In this case, the values of d_nz,o_nz are 4517 .vb 4518 proc0 : dnz = 2, o_nz = 2 4519 proc1 : dnz = 3, o_nz = 2 4520 proc2 : dnz = 1, o_nz = 4 4521 .ve 4522 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4523 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4524 for proc3. i.e we are using 12+15+10=37 storage locations to store 4525 34 values. 4526 4527 When d_nnz, o_nnz parameters are specified, the storage is specified 4528 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4529 In the above case the values for d_nnz,o_nnz are 4530 .vb 4531 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4532 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4533 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4534 .ve 4535 Here the space allocated is sum of all the above values i.e 34, and 4536 hence pre-allocation is perfect. 4537 4538 Level: intermediate 4539 4540 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4541 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4542 @*/ 4543 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4544 { 4545 PetscErrorCode ierr; 4546 PetscMPIInt size; 4547 4548 PetscFunctionBegin; 4549 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4550 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4551 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4552 if (size > 1) { 4553 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4554 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4555 } else { 4556 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4557 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4558 } 4559 PetscFunctionReturn(0); 4560 } 4561 4562 /*@C 4563 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4564 4565 Not collective 4566 4567 Input Parameter: 4568 . A - The MPIAIJ matrix 4569 4570 Output Parameters: 4571 + Ad - The local diagonal block as a SeqAIJ matrix 4572 . Ao - The local off-diagonal block as a SeqAIJ matrix 4573 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4574 4575 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4576 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4577 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4578 local column numbers to global column numbers in the original matrix. 4579 4580 Level: intermediate 4581 4582 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4583 @*/ 4584 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4585 { 4586 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4587 PetscBool flg; 4588 PetscErrorCode ierr; 4589 4590 PetscFunctionBegin; 4591 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4592 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4593 if (Ad) *Ad = a->A; 4594 if (Ao) *Ao = a->B; 4595 if (colmap) *colmap = a->garray; 4596 PetscFunctionReturn(0); 4597 } 4598 4599 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4600 { 4601 PetscErrorCode ierr; 4602 PetscInt m,N,i,rstart,nnz,Ii; 4603 PetscInt *indx; 4604 PetscScalar *values; 4605 4606 PetscFunctionBegin; 4607 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4608 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4609 PetscInt *dnz,*onz,sum,bs,cbs; 4610 4611 if (n == PETSC_DECIDE) { 4612 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4613 } 4614 /* Check sum(n) = N */ 4615 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4616 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4617 4618 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4619 rstart -= m; 4620 4621 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4622 for (i=0; i<m; i++) { 4623 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4624 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4625 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4626 } 4627 4628 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4629 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4630 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4631 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4632 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4633 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4634 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4635 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4636 } 4637 4638 /* numeric phase */ 4639 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4640 for (i=0; i<m; i++) { 4641 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4642 Ii = i + rstart; 4643 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4644 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4645 } 4646 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4647 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4648 PetscFunctionReturn(0); 4649 } 4650 4651 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4652 { 4653 PetscErrorCode ierr; 4654 PetscMPIInt rank; 4655 PetscInt m,N,i,rstart,nnz; 4656 size_t len; 4657 const PetscInt *indx; 4658 PetscViewer out; 4659 char *name; 4660 Mat B; 4661 const PetscScalar *values; 4662 4663 PetscFunctionBegin; 4664 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4665 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4666 /* Should this be the type of the diagonal block of A? */ 4667 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4668 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4669 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4670 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4671 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4672 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4673 for (i=0; i<m; i++) { 4674 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4675 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4676 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4677 } 4678 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4679 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4680 4681 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4682 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4683 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4684 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4685 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4686 ierr = PetscFree(name);CHKERRQ(ierr); 4687 ierr = MatView(B,out);CHKERRQ(ierr); 4688 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4689 ierr = MatDestroy(&B);CHKERRQ(ierr); 4690 PetscFunctionReturn(0); 4691 } 4692 4693 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4694 { 4695 PetscErrorCode ierr; 4696 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4697 4698 PetscFunctionBegin; 4699 if (!merge) PetscFunctionReturn(0); 4700 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4701 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4702 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4703 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4704 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4705 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4706 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4707 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4708 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4709 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4710 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4711 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4712 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4713 ierr = PetscFree(merge);CHKERRQ(ierr); 4714 PetscFunctionReturn(0); 4715 } 4716 4717 #include <../src/mat/utils/freespace.h> 4718 #include <petscbt.h> 4719 4720 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4721 { 4722 PetscErrorCode ierr; 4723 MPI_Comm comm; 4724 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4725 PetscMPIInt size,rank,taga,*len_s; 4726 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4727 PetscInt proc,m; 4728 PetscInt **buf_ri,**buf_rj; 4729 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4730 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4731 MPI_Request *s_waits,*r_waits; 4732 MPI_Status *status; 4733 MatScalar *aa=a->a; 4734 MatScalar **abuf_r,*ba_i; 4735 Mat_Merge_SeqsToMPI *merge; 4736 PetscContainer container; 4737 4738 PetscFunctionBegin; 4739 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4740 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4741 4742 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4743 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4744 4745 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4746 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4747 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4748 4749 bi = merge->bi; 4750 bj = merge->bj; 4751 buf_ri = merge->buf_ri; 4752 buf_rj = merge->buf_rj; 4753 4754 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4755 owners = merge->rowmap->range; 4756 len_s = merge->len_s; 4757 4758 /* send and recv matrix values */ 4759 /*-----------------------------*/ 4760 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4761 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4762 4763 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4764 for (proc=0,k=0; proc<size; proc++) { 4765 if (!len_s[proc]) continue; 4766 i = owners[proc]; 4767 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4768 k++; 4769 } 4770 4771 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4772 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4773 ierr = PetscFree(status);CHKERRQ(ierr); 4774 4775 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4776 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4777 4778 /* insert mat values of mpimat */ 4779 /*----------------------------*/ 4780 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4781 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4782 4783 for (k=0; k<merge->nrecv; k++) { 4784 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4785 nrows = *(buf_ri_k[k]); 4786 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4787 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4788 } 4789 4790 /* set values of ba */ 4791 m = merge->rowmap->n; 4792 for (i=0; i<m; i++) { 4793 arow = owners[rank] + i; 4794 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4795 bnzi = bi[i+1] - bi[i]; 4796 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4797 4798 /* add local non-zero vals of this proc's seqmat into ba */ 4799 anzi = ai[arow+1] - ai[arow]; 4800 aj = a->j + ai[arow]; 4801 aa = a->a + ai[arow]; 4802 nextaj = 0; 4803 for (j=0; nextaj<anzi; j++) { 4804 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4805 ba_i[j] += aa[nextaj++]; 4806 } 4807 } 4808 4809 /* add received vals into ba */ 4810 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4811 /* i-th row */ 4812 if (i == *nextrow[k]) { 4813 anzi = *(nextai[k]+1) - *nextai[k]; 4814 aj = buf_rj[k] + *(nextai[k]); 4815 aa = abuf_r[k] + *(nextai[k]); 4816 nextaj = 0; 4817 for (j=0; nextaj<anzi; j++) { 4818 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4819 ba_i[j] += aa[nextaj++]; 4820 } 4821 } 4822 nextrow[k]++; nextai[k]++; 4823 } 4824 } 4825 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4826 } 4827 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4828 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4829 4830 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4831 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4832 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4833 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4834 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4835 PetscFunctionReturn(0); 4836 } 4837 4838 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4839 { 4840 PetscErrorCode ierr; 4841 Mat B_mpi; 4842 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4843 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4844 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4845 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4846 PetscInt len,proc,*dnz,*onz,bs,cbs; 4847 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4848 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4849 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4850 MPI_Status *status; 4851 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4852 PetscBT lnkbt; 4853 Mat_Merge_SeqsToMPI *merge; 4854 PetscContainer container; 4855 4856 PetscFunctionBegin; 4857 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4858 4859 /* make sure it is a PETSc comm */ 4860 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4861 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4862 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4863 4864 ierr = PetscNew(&merge);CHKERRQ(ierr); 4865 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4866 4867 /* determine row ownership */ 4868 /*---------------------------------------------------------*/ 4869 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4870 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4871 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4872 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4873 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4874 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4875 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4876 4877 m = merge->rowmap->n; 4878 owners = merge->rowmap->range; 4879 4880 /* determine the number of messages to send, their lengths */ 4881 /*---------------------------------------------------------*/ 4882 len_s = merge->len_s; 4883 4884 len = 0; /* length of buf_si[] */ 4885 merge->nsend = 0; 4886 for (proc=0; proc<size; proc++) { 4887 len_si[proc] = 0; 4888 if (proc == rank) { 4889 len_s[proc] = 0; 4890 } else { 4891 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4892 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4893 } 4894 if (len_s[proc]) { 4895 merge->nsend++; 4896 nrows = 0; 4897 for (i=owners[proc]; i<owners[proc+1]; i++) { 4898 if (ai[i+1] > ai[i]) nrows++; 4899 } 4900 len_si[proc] = 2*(nrows+1); 4901 len += len_si[proc]; 4902 } 4903 } 4904 4905 /* determine the number and length of messages to receive for ij-structure */ 4906 /*-------------------------------------------------------------------------*/ 4907 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4908 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4909 4910 /* post the Irecv of j-structure */ 4911 /*-------------------------------*/ 4912 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4913 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4914 4915 /* post the Isend of j-structure */ 4916 /*--------------------------------*/ 4917 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4918 4919 for (proc=0, k=0; proc<size; proc++) { 4920 if (!len_s[proc]) continue; 4921 i = owners[proc]; 4922 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4923 k++; 4924 } 4925 4926 /* receives and sends of j-structure are complete */ 4927 /*------------------------------------------------*/ 4928 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4929 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4930 4931 /* send and recv i-structure */ 4932 /*---------------------------*/ 4933 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4934 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4935 4936 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4937 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4938 for (proc=0,k=0; proc<size; proc++) { 4939 if (!len_s[proc]) continue; 4940 /* form outgoing message for i-structure: 4941 buf_si[0]: nrows to be sent 4942 [1:nrows]: row index (global) 4943 [nrows+1:2*nrows+1]: i-structure index 4944 */ 4945 /*-------------------------------------------*/ 4946 nrows = len_si[proc]/2 - 1; 4947 buf_si_i = buf_si + nrows+1; 4948 buf_si[0] = nrows; 4949 buf_si_i[0] = 0; 4950 nrows = 0; 4951 for (i=owners[proc]; i<owners[proc+1]; i++) { 4952 anzi = ai[i+1] - ai[i]; 4953 if (anzi) { 4954 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4955 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4956 nrows++; 4957 } 4958 } 4959 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4960 k++; 4961 buf_si += len_si[proc]; 4962 } 4963 4964 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4965 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4966 4967 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4968 for (i=0; i<merge->nrecv; i++) { 4969 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4970 } 4971 4972 ierr = PetscFree(len_si);CHKERRQ(ierr); 4973 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4974 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4975 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4976 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4977 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4978 ierr = PetscFree(status);CHKERRQ(ierr); 4979 4980 /* compute a local seq matrix in each processor */ 4981 /*----------------------------------------------*/ 4982 /* allocate bi array and free space for accumulating nonzero column info */ 4983 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4984 bi[0] = 0; 4985 4986 /* create and initialize a linked list */ 4987 nlnk = N+1; 4988 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4989 4990 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4991 len = ai[owners[rank+1]] - ai[owners[rank]]; 4992 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4993 4994 current_space = free_space; 4995 4996 /* determine symbolic info for each local row */ 4997 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4998 4999 for (k=0; k<merge->nrecv; k++) { 5000 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5001 nrows = *buf_ri_k[k]; 5002 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5003 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 5004 } 5005 5006 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 5007 len = 0; 5008 for (i=0; i<m; i++) { 5009 bnzi = 0; 5010 /* add local non-zero cols of this proc's seqmat into lnk */ 5011 arow = owners[rank] + i; 5012 anzi = ai[arow+1] - ai[arow]; 5013 aj = a->j + ai[arow]; 5014 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5015 bnzi += nlnk; 5016 /* add received col data into lnk */ 5017 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 5018 if (i == *nextrow[k]) { /* i-th row */ 5019 anzi = *(nextai[k]+1) - *nextai[k]; 5020 aj = buf_rj[k] + *nextai[k]; 5021 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5022 bnzi += nlnk; 5023 nextrow[k]++; nextai[k]++; 5024 } 5025 } 5026 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5027 5028 /* if free space is not available, make more free space */ 5029 if (current_space->local_remaining<bnzi) { 5030 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 5031 nspacedouble++; 5032 } 5033 /* copy data into free space, then initialize lnk */ 5034 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5035 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5036 5037 current_space->array += bnzi; 5038 current_space->local_used += bnzi; 5039 current_space->local_remaining -= bnzi; 5040 5041 bi[i+1] = bi[i] + bnzi; 5042 } 5043 5044 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5045 5046 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 5047 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5048 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5049 5050 /* create symbolic parallel matrix B_mpi */ 5051 /*---------------------------------------*/ 5052 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5053 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5054 if (n==PETSC_DECIDE) { 5055 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5056 } else { 5057 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5058 } 5059 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5060 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5061 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5062 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5063 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5064 5065 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5066 B_mpi->assembled = PETSC_FALSE; 5067 merge->bi = bi; 5068 merge->bj = bj; 5069 merge->buf_ri = buf_ri; 5070 merge->buf_rj = buf_rj; 5071 merge->coi = NULL; 5072 merge->coj = NULL; 5073 merge->owners_co = NULL; 5074 5075 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5076 5077 /* attach the supporting struct to B_mpi for reuse */ 5078 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5079 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5080 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 5081 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5082 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5083 *mpimat = B_mpi; 5084 5085 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5086 PetscFunctionReturn(0); 5087 } 5088 5089 /*@C 5090 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5091 matrices from each processor 5092 5093 Collective 5094 5095 Input Parameters: 5096 + comm - the communicators the parallel matrix will live on 5097 . seqmat - the input sequential matrices 5098 . m - number of local rows (or PETSC_DECIDE) 5099 . n - number of local columns (or PETSC_DECIDE) 5100 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5101 5102 Output Parameter: 5103 . mpimat - the parallel matrix generated 5104 5105 Level: advanced 5106 5107 Notes: 5108 The dimensions of the sequential matrix in each processor MUST be the same. 5109 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5110 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5111 @*/ 5112 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5113 { 5114 PetscErrorCode ierr; 5115 PetscMPIInt size; 5116 5117 PetscFunctionBegin; 5118 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5119 if (size == 1) { 5120 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5121 if (scall == MAT_INITIAL_MATRIX) { 5122 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5123 } else { 5124 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5125 } 5126 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5127 PetscFunctionReturn(0); 5128 } 5129 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5130 if (scall == MAT_INITIAL_MATRIX) { 5131 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5132 } 5133 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5134 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5135 PetscFunctionReturn(0); 5136 } 5137 5138 /*@ 5139 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5140 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5141 with MatGetSize() 5142 5143 Not Collective 5144 5145 Input Parameters: 5146 + A - the matrix 5147 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5148 5149 Output Parameter: 5150 . A_loc - the local sequential matrix generated 5151 5152 Level: developer 5153 5154 Notes: 5155 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5156 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5157 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5158 modify the values of the returned A_loc. 5159 5160 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5161 @*/ 5162 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5163 { 5164 PetscErrorCode ierr; 5165 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5166 Mat_SeqAIJ *mat,*a,*b; 5167 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5168 MatScalar *aa,*ba,*cam; 5169 PetscScalar *ca; 5170 PetscMPIInt size; 5171 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5172 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5173 PetscBool match; 5174 5175 PetscFunctionBegin; 5176 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5177 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5178 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5179 if (size == 1) { 5180 if (scall == MAT_INITIAL_MATRIX) { 5181 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5182 *A_loc = mpimat->A; 5183 } else if (scall == MAT_REUSE_MATRIX) { 5184 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5185 } 5186 PetscFunctionReturn(0); 5187 } 5188 5189 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5190 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5191 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5192 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5193 aa = a->a; ba = b->a; 5194 if (scall == MAT_INITIAL_MATRIX) { 5195 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5196 ci[0] = 0; 5197 for (i=0; i<am; i++) { 5198 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5199 } 5200 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5201 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5202 k = 0; 5203 for (i=0; i<am; i++) { 5204 ncols_o = bi[i+1] - bi[i]; 5205 ncols_d = ai[i+1] - ai[i]; 5206 /* off-diagonal portion of A */ 5207 for (jo=0; jo<ncols_o; jo++) { 5208 col = cmap[*bj]; 5209 if (col >= cstart) break; 5210 cj[k] = col; bj++; 5211 ca[k++] = *ba++; 5212 } 5213 /* diagonal portion of A */ 5214 for (j=0; j<ncols_d; j++) { 5215 cj[k] = cstart + *aj++; 5216 ca[k++] = *aa++; 5217 } 5218 /* off-diagonal portion of A */ 5219 for (j=jo; j<ncols_o; j++) { 5220 cj[k] = cmap[*bj++]; 5221 ca[k++] = *ba++; 5222 } 5223 } 5224 /* put together the new matrix */ 5225 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5226 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5227 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5228 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5229 mat->free_a = PETSC_TRUE; 5230 mat->free_ij = PETSC_TRUE; 5231 mat->nonew = 0; 5232 } else if (scall == MAT_REUSE_MATRIX) { 5233 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5234 ci = mat->i; cj = mat->j; cam = mat->a; 5235 for (i=0; i<am; i++) { 5236 /* off-diagonal portion of A */ 5237 ncols_o = bi[i+1] - bi[i]; 5238 for (jo=0; jo<ncols_o; jo++) { 5239 col = cmap[*bj]; 5240 if (col >= cstart) break; 5241 *cam++ = *ba++; bj++; 5242 } 5243 /* diagonal portion of A */ 5244 ncols_d = ai[i+1] - ai[i]; 5245 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5246 /* off-diagonal portion of A */ 5247 for (j=jo; j<ncols_o; j++) { 5248 *cam++ = *ba++; bj++; 5249 } 5250 } 5251 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5252 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5253 PetscFunctionReturn(0); 5254 } 5255 5256 /*@ 5257 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5258 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5259 5260 Not Collective 5261 5262 Input Parameters: 5263 + A - the matrix 5264 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5265 5266 Output Parameter: 5267 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5268 - A_loc - the local sequential matrix generated 5269 5270 Level: developer 5271 5272 Notes: 5273 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5274 5275 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5276 5277 @*/ 5278 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5279 { 5280 PetscErrorCode ierr; 5281 Mat Ao,Ad; 5282 const PetscInt *cmap; 5283 PetscMPIInt size; 5284 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5285 5286 PetscFunctionBegin; 5287 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5288 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 5289 if (size == 1) { 5290 if (scall == MAT_INITIAL_MATRIX) { 5291 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5292 *A_loc = Ad; 5293 } else if (scall == MAT_REUSE_MATRIX) { 5294 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5295 } 5296 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5297 PetscFunctionReturn(0); 5298 } 5299 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5300 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5301 if (f) { 5302 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5303 } else { 5304 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5305 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5306 Mat_SeqAIJ *c; 5307 PetscInt *ai = a->i, *aj = a->j; 5308 PetscInt *bi = b->i, *bj = b->j; 5309 PetscInt *ci,*cj; 5310 const PetscScalar *aa,*ba; 5311 PetscScalar *ca; 5312 PetscInt i,j,am,dn,on; 5313 5314 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5315 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5316 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5317 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5318 if (scall == MAT_INITIAL_MATRIX) { 5319 PetscInt k; 5320 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5321 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5322 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5323 ci[0] = 0; 5324 for (i=0,k=0; i<am; i++) { 5325 const PetscInt ncols_o = bi[i+1] - bi[i]; 5326 const PetscInt ncols_d = ai[i+1] - ai[i]; 5327 ci[i+1] = ci[i] + ncols_o + ncols_d; 5328 /* diagonal portion of A */ 5329 for (j=0; j<ncols_d; j++,k++) { 5330 cj[k] = *aj++; 5331 ca[k] = *aa++; 5332 } 5333 /* off-diagonal portion of A */ 5334 for (j=0; j<ncols_o; j++,k++) { 5335 cj[k] = dn + *bj++; 5336 ca[k] = *ba++; 5337 } 5338 } 5339 /* put together the new matrix */ 5340 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5341 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5342 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5343 c = (Mat_SeqAIJ*)(*A_loc)->data; 5344 c->free_a = PETSC_TRUE; 5345 c->free_ij = PETSC_TRUE; 5346 c->nonew = 0; 5347 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5348 } else if (scall == MAT_REUSE_MATRIX) { 5349 #if defined(PETSC_HAVE_DEVICE) 5350 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5351 #endif 5352 c = (Mat_SeqAIJ*)(*A_loc)->data; 5353 ca = c->a; 5354 for (i=0; i<am; i++) { 5355 const PetscInt ncols_d = ai[i+1] - ai[i]; 5356 const PetscInt ncols_o = bi[i+1] - bi[i]; 5357 /* diagonal portion of A */ 5358 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5359 /* off-diagonal portion of A */ 5360 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5361 } 5362 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5363 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5364 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5365 if (glob) { 5366 PetscInt cst, *gidx; 5367 5368 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5369 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5370 for (i=0; i<dn; i++) gidx[i] = cst + i; 5371 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5372 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5373 } 5374 } 5375 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5376 PetscFunctionReturn(0); 5377 } 5378 5379 /*@C 5380 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5381 5382 Not Collective 5383 5384 Input Parameters: 5385 + A - the matrix 5386 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5387 - row, col - index sets of rows and columns to extract (or NULL) 5388 5389 Output Parameter: 5390 . A_loc - the local sequential matrix generated 5391 5392 Level: developer 5393 5394 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5395 5396 @*/ 5397 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5398 { 5399 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5400 PetscErrorCode ierr; 5401 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5402 IS isrowa,iscola; 5403 Mat *aloc; 5404 PetscBool match; 5405 5406 PetscFunctionBegin; 5407 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5408 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5409 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5410 if (!row) { 5411 start = A->rmap->rstart; end = A->rmap->rend; 5412 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5413 } else { 5414 isrowa = *row; 5415 } 5416 if (!col) { 5417 start = A->cmap->rstart; 5418 cmap = a->garray; 5419 nzA = a->A->cmap->n; 5420 nzB = a->B->cmap->n; 5421 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5422 ncols = 0; 5423 for (i=0; i<nzB; i++) { 5424 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5425 else break; 5426 } 5427 imark = i; 5428 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5429 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5430 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5431 } else { 5432 iscola = *col; 5433 } 5434 if (scall != MAT_INITIAL_MATRIX) { 5435 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5436 aloc[0] = *A_loc; 5437 } 5438 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5439 if (!col) { /* attach global id of condensed columns */ 5440 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5441 } 5442 *A_loc = aloc[0]; 5443 ierr = PetscFree(aloc);CHKERRQ(ierr); 5444 if (!row) { 5445 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5446 } 5447 if (!col) { 5448 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5449 } 5450 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5451 PetscFunctionReturn(0); 5452 } 5453 5454 /* 5455 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5456 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5457 * on a global size. 5458 * */ 5459 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5460 { 5461 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5462 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5463 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5464 PetscMPIInt owner; 5465 PetscSFNode *iremote,*oiremote; 5466 const PetscInt *lrowindices; 5467 PetscErrorCode ierr; 5468 PetscSF sf,osf; 5469 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5470 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5471 MPI_Comm comm; 5472 ISLocalToGlobalMapping mapping; 5473 5474 PetscFunctionBegin; 5475 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5476 /* plocalsize is the number of roots 5477 * nrows is the number of leaves 5478 * */ 5479 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5480 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5481 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5482 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5483 for (i=0;i<nrows;i++) { 5484 /* Find a remote index and an owner for a row 5485 * The row could be local or remote 5486 * */ 5487 owner = 0; 5488 lidx = 0; 5489 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5490 iremote[i].index = lidx; 5491 iremote[i].rank = owner; 5492 } 5493 /* Create SF to communicate how many nonzero columns for each row */ 5494 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5495 /* SF will figure out the number of nonzero colunms for each row, and their 5496 * offsets 5497 * */ 5498 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5499 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5500 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5501 5502 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5503 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5504 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5505 roffsets[0] = 0; 5506 roffsets[1] = 0; 5507 for (i=0;i<plocalsize;i++) { 5508 /* diag */ 5509 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5510 /* off diag */ 5511 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5512 /* compute offsets so that we relative location for each row */ 5513 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5514 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5515 } 5516 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5517 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5518 /* 'r' means root, and 'l' means leaf */ 5519 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5520 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5521 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5522 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5523 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5524 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5525 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5526 dntotalcols = 0; 5527 ontotalcols = 0; 5528 ncol = 0; 5529 for (i=0;i<nrows;i++) { 5530 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5531 ncol = PetscMax(pnnz[i],ncol); 5532 /* diag */ 5533 dntotalcols += nlcols[i*2+0]; 5534 /* off diag */ 5535 ontotalcols += nlcols[i*2+1]; 5536 } 5537 /* We do not need to figure the right number of columns 5538 * since all the calculations will be done by going through the raw data 5539 * */ 5540 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5541 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5542 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5543 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5544 /* diag */ 5545 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5546 /* off diag */ 5547 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5548 /* diag */ 5549 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5550 /* off diag */ 5551 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5552 dntotalcols = 0; 5553 ontotalcols = 0; 5554 ntotalcols = 0; 5555 for (i=0;i<nrows;i++) { 5556 owner = 0; 5557 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5558 /* Set iremote for diag matrix */ 5559 for (j=0;j<nlcols[i*2+0];j++) { 5560 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5561 iremote[dntotalcols].rank = owner; 5562 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5563 ilocal[dntotalcols++] = ntotalcols++; 5564 } 5565 /* off diag */ 5566 for (j=0;j<nlcols[i*2+1];j++) { 5567 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5568 oiremote[ontotalcols].rank = owner; 5569 oilocal[ontotalcols++] = ntotalcols++; 5570 } 5571 } 5572 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5573 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5574 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5575 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5576 /* P serves as roots and P_oth is leaves 5577 * Diag matrix 5578 * */ 5579 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5580 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5581 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5582 5583 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5584 /* Off diag */ 5585 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5586 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5587 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5588 /* We operate on the matrix internal data for saving memory */ 5589 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5590 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5591 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5592 /* Convert to global indices for diag matrix */ 5593 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5594 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5595 /* We want P_oth store global indices */ 5596 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5597 /* Use memory scalable approach */ 5598 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5599 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5600 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5601 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5602 /* Convert back to local indices */ 5603 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5604 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5605 nout = 0; 5606 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5607 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5608 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5609 /* Exchange values */ 5610 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5611 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5612 /* Stop PETSc from shrinking memory */ 5613 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5614 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5615 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5616 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5617 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5618 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5619 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5620 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5621 PetscFunctionReturn(0); 5622 } 5623 5624 /* 5625 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5626 * This supports MPIAIJ and MAIJ 5627 * */ 5628 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5629 { 5630 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5631 Mat_SeqAIJ *p_oth; 5632 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5633 IS rows,map; 5634 PetscHMapI hamp; 5635 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5636 MPI_Comm comm; 5637 PetscSF sf,osf; 5638 PetscBool has; 5639 PetscErrorCode ierr; 5640 5641 PetscFunctionBegin; 5642 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5643 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5644 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5645 * and then create a submatrix (that often is an overlapping matrix) 5646 * */ 5647 if (reuse == MAT_INITIAL_MATRIX) { 5648 /* Use a hash table to figure out unique keys */ 5649 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5650 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5651 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5652 count = 0; 5653 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5654 for (i=0;i<a->B->cmap->n;i++) { 5655 key = a->garray[i]/dof; 5656 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5657 if (!has) { 5658 mapping[i] = count; 5659 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5660 } else { 5661 /* Current 'i' has the same value the previous step */ 5662 mapping[i] = count-1; 5663 } 5664 } 5665 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5666 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5667 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5668 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5669 off = 0; 5670 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5671 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5672 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5673 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5674 /* In case, the matrix was already created but users want to recreate the matrix */ 5675 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5676 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5677 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5678 ierr = ISDestroy(&map);CHKERRQ(ierr); 5679 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5680 } else if (reuse == MAT_REUSE_MATRIX) { 5681 /* If matrix was already created, we simply update values using SF objects 5682 * that as attached to the matrix ealier. 5683 * */ 5684 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5685 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5686 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5687 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5688 /* Update values in place */ 5689 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5690 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5691 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5692 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5693 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5694 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5695 PetscFunctionReturn(0); 5696 } 5697 5698 /*@C 5699 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5700 5701 Collective on Mat 5702 5703 Input Parameters: 5704 + A,B - the matrices in mpiaij format 5705 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5706 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5707 5708 Output Parameter: 5709 + rowb, colb - index sets of rows and columns of B to extract 5710 - B_seq - the sequential matrix generated 5711 5712 Level: developer 5713 5714 @*/ 5715 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5716 { 5717 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5718 PetscErrorCode ierr; 5719 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5720 IS isrowb,iscolb; 5721 Mat *bseq=NULL; 5722 5723 PetscFunctionBegin; 5724 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5725 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5726 } 5727 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5728 5729 if (scall == MAT_INITIAL_MATRIX) { 5730 start = A->cmap->rstart; 5731 cmap = a->garray; 5732 nzA = a->A->cmap->n; 5733 nzB = a->B->cmap->n; 5734 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5735 ncols = 0; 5736 for (i=0; i<nzB; i++) { /* row < local row index */ 5737 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5738 else break; 5739 } 5740 imark = i; 5741 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5742 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5743 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5744 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5745 } else { 5746 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5747 isrowb = *rowb; iscolb = *colb; 5748 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5749 bseq[0] = *B_seq; 5750 } 5751 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5752 *B_seq = bseq[0]; 5753 ierr = PetscFree(bseq);CHKERRQ(ierr); 5754 if (!rowb) { 5755 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5756 } else { 5757 *rowb = isrowb; 5758 } 5759 if (!colb) { 5760 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5761 } else { 5762 *colb = iscolb; 5763 } 5764 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5765 PetscFunctionReturn(0); 5766 } 5767 5768 /* 5769 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5770 of the OFF-DIAGONAL portion of local A 5771 5772 Collective on Mat 5773 5774 Input Parameters: 5775 + A,B - the matrices in mpiaij format 5776 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5777 5778 Output Parameter: 5779 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5780 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5781 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5782 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5783 5784 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5785 for this matrix. This is not desirable.. 5786 5787 Level: developer 5788 5789 */ 5790 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5791 { 5792 PetscErrorCode ierr; 5793 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5794 Mat_SeqAIJ *b_oth; 5795 VecScatter ctx; 5796 MPI_Comm comm; 5797 const PetscMPIInt *rprocs,*sprocs; 5798 const PetscInt *srow,*rstarts,*sstarts; 5799 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5800 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5801 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5802 MPI_Request *rwaits = NULL,*swaits = NULL; 5803 MPI_Status rstatus; 5804 PetscMPIInt size,tag,rank,nsends_mpi,nrecvs_mpi; 5805 PETSC_UNUSED PetscMPIInt jj; 5806 5807 PetscFunctionBegin; 5808 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5809 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5810 5811 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5812 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5813 } 5814 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5815 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5816 5817 if (size == 1) { 5818 startsj_s = NULL; 5819 bufa_ptr = NULL; 5820 *B_oth = NULL; 5821 PetscFunctionReturn(0); 5822 } 5823 5824 ctx = a->Mvctx; 5825 tag = ((PetscObject)ctx)->tag; 5826 5827 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5828 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5829 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5830 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5831 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5832 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5833 5834 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5835 if (scall == MAT_INITIAL_MATRIX) { 5836 /* i-array */ 5837 /*---------*/ 5838 /* post receives */ 5839 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5840 for (i=0; i<nrecvs; i++) { 5841 rowlen = rvalues + rstarts[i]*rbs; 5842 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5843 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5844 } 5845 5846 /* pack the outgoing message */ 5847 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5848 5849 sstartsj[0] = 0; 5850 rstartsj[0] = 0; 5851 len = 0; /* total length of j or a array to be sent */ 5852 if (nsends) { 5853 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5854 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5855 } 5856 for (i=0; i<nsends; i++) { 5857 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5858 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5859 for (j=0; j<nrows; j++) { 5860 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5861 for (l=0; l<sbs; l++) { 5862 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5863 5864 rowlen[j*sbs+l] = ncols; 5865 5866 len += ncols; 5867 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5868 } 5869 k++; 5870 } 5871 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5872 5873 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5874 } 5875 /* recvs and sends of i-array are completed */ 5876 i = nrecvs; 5877 while (i--) { 5878 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5879 } 5880 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5881 ierr = PetscFree(svalues);CHKERRQ(ierr); 5882 5883 /* allocate buffers for sending j and a arrays */ 5884 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5885 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5886 5887 /* create i-array of B_oth */ 5888 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5889 5890 b_othi[0] = 0; 5891 len = 0; /* total length of j or a array to be received */ 5892 k = 0; 5893 for (i=0; i<nrecvs; i++) { 5894 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5895 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5896 for (j=0; j<nrows; j++) { 5897 b_othi[k+1] = b_othi[k] + rowlen[j]; 5898 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5899 k++; 5900 } 5901 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5902 } 5903 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5904 5905 /* allocate space for j and a arrrays of B_oth */ 5906 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5907 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5908 5909 /* j-array */ 5910 /*---------*/ 5911 /* post receives of j-array */ 5912 for (i=0; i<nrecvs; i++) { 5913 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5914 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5915 } 5916 5917 /* pack the outgoing message j-array */ 5918 if (nsends) k = sstarts[0]; 5919 for (i=0; i<nsends; i++) { 5920 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5921 bufJ = bufj+sstartsj[i]; 5922 for (j=0; j<nrows; j++) { 5923 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5924 for (ll=0; ll<sbs; ll++) { 5925 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5926 for (l=0; l<ncols; l++) { 5927 *bufJ++ = cols[l]; 5928 } 5929 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5930 } 5931 } 5932 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5933 } 5934 5935 /* recvs and sends of j-array are completed */ 5936 i = nrecvs; 5937 while (i--) { 5938 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5939 } 5940 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5941 } else if (scall == MAT_REUSE_MATRIX) { 5942 sstartsj = *startsj_s; 5943 rstartsj = *startsj_r; 5944 bufa = *bufa_ptr; 5945 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5946 b_otha = b_oth->a; 5947 #if defined(PETSC_HAVE_DEVICE) 5948 (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU; 5949 #endif 5950 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5951 5952 /* a-array */ 5953 /*---------*/ 5954 /* post receives of a-array */ 5955 for (i=0; i<nrecvs; i++) { 5956 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5957 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5958 } 5959 5960 /* pack the outgoing message a-array */ 5961 if (nsends) k = sstarts[0]; 5962 for (i=0; i<nsends; i++) { 5963 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5964 bufA = bufa+sstartsj[i]; 5965 for (j=0; j<nrows; j++) { 5966 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5967 for (ll=0; ll<sbs; ll++) { 5968 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5969 for (l=0; l<ncols; l++) { 5970 *bufA++ = vals[l]; 5971 } 5972 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5973 } 5974 } 5975 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5976 } 5977 /* recvs and sends of a-array are completed */ 5978 i = nrecvs; 5979 while (i--) { 5980 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5981 } 5982 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5983 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5984 5985 if (scall == MAT_INITIAL_MATRIX) { 5986 /* put together the new matrix */ 5987 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5988 5989 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5990 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5991 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5992 b_oth->free_a = PETSC_TRUE; 5993 b_oth->free_ij = PETSC_TRUE; 5994 b_oth->nonew = 0; 5995 5996 ierr = PetscFree(bufj);CHKERRQ(ierr); 5997 if (!startsj_s || !bufa_ptr) { 5998 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5999 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 6000 } else { 6001 *startsj_s = sstartsj; 6002 *startsj_r = rstartsj; 6003 *bufa_ptr = bufa; 6004 } 6005 } 6006 6007 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 6008 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 6009 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 6010 PetscFunctionReturn(0); 6011 } 6012 6013 /*@C 6014 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 6015 6016 Not Collective 6017 6018 Input Parameters: 6019 . A - The matrix in mpiaij format 6020 6021 Output Parameter: 6022 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 6023 . colmap - A map from global column index to local index into lvec 6024 - multScatter - A scatter from the argument of a matrix-vector product to lvec 6025 6026 Level: developer 6027 6028 @*/ 6029 #if defined(PETSC_USE_CTABLE) 6030 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 6031 #else 6032 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 6033 #endif 6034 { 6035 Mat_MPIAIJ *a; 6036 6037 PetscFunctionBegin; 6038 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 6039 PetscValidPointer(lvec, 2); 6040 PetscValidPointer(colmap, 3); 6041 PetscValidPointer(multScatter, 4); 6042 a = (Mat_MPIAIJ*) A->data; 6043 if (lvec) *lvec = a->lvec; 6044 if (colmap) *colmap = a->colmap; 6045 if (multScatter) *multScatter = a->Mvctx; 6046 PetscFunctionReturn(0); 6047 } 6048 6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 6052 #if defined(PETSC_HAVE_MKL_SPARSE) 6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 6054 #endif 6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 6057 #if defined(PETSC_HAVE_ELEMENTAL) 6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 6059 #endif 6060 #if defined(PETSC_HAVE_SCALAPACK) 6061 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 6062 #endif 6063 #if defined(PETSC_HAVE_HYPRE) 6064 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 6065 #endif 6066 #if defined(PETSC_HAVE_CUDA) 6067 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 6068 #endif 6069 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6070 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 6071 #endif 6072 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 6073 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 6074 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6075 6076 /* 6077 Computes (B'*A')' since computing B*A directly is untenable 6078 6079 n p p 6080 [ ] [ ] [ ] 6081 m [ A ] * n [ B ] = m [ C ] 6082 [ ] [ ] [ ] 6083 6084 */ 6085 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 6086 { 6087 PetscErrorCode ierr; 6088 Mat At,Bt,Ct; 6089 6090 PetscFunctionBegin; 6091 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 6092 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 6093 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 6094 ierr = MatDestroy(&At);CHKERRQ(ierr); 6095 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 6096 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 6097 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 6098 PetscFunctionReturn(0); 6099 } 6100 6101 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 6102 { 6103 PetscErrorCode ierr; 6104 PetscBool cisdense; 6105 6106 PetscFunctionBegin; 6107 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 6108 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 6109 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 6110 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 6111 if (!cisdense) { 6112 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6113 } 6114 ierr = MatSetUp(C);CHKERRQ(ierr); 6115 6116 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6117 PetscFunctionReturn(0); 6118 } 6119 6120 /* ----------------------------------------------------------------*/ 6121 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6122 { 6123 Mat_Product *product = C->product; 6124 Mat A = product->A,B=product->B; 6125 6126 PetscFunctionBegin; 6127 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6128 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6129 6130 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6131 C->ops->productsymbolic = MatProductSymbolic_AB; 6132 PetscFunctionReturn(0); 6133 } 6134 6135 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6136 { 6137 PetscErrorCode ierr; 6138 Mat_Product *product = C->product; 6139 6140 PetscFunctionBegin; 6141 if (product->type == MATPRODUCT_AB) { 6142 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6143 } 6144 PetscFunctionReturn(0); 6145 } 6146 /* ----------------------------------------------------------------*/ 6147 6148 /*MC 6149 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6150 6151 Options Database Keys: 6152 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6153 6154 Level: beginner 6155 6156 Notes: 6157 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6158 in this case the values associated with the rows and columns one passes in are set to zero 6159 in the matrix 6160 6161 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6162 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6163 6164 .seealso: MatCreateAIJ() 6165 M*/ 6166 6167 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6168 { 6169 Mat_MPIAIJ *b; 6170 PetscErrorCode ierr; 6171 PetscMPIInt size; 6172 6173 PetscFunctionBegin; 6174 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6175 6176 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6177 B->data = (void*)b; 6178 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6179 B->assembled = PETSC_FALSE; 6180 B->insertmode = NOT_SET_VALUES; 6181 b->size = size; 6182 6183 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6184 6185 /* build cache for off array entries formed */ 6186 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6187 6188 b->donotstash = PETSC_FALSE; 6189 b->colmap = NULL; 6190 b->garray = NULL; 6191 b->roworiented = PETSC_TRUE; 6192 6193 /* stuff used for matrix vector multiply */ 6194 b->lvec = NULL; 6195 b->Mvctx = NULL; 6196 6197 /* stuff for MatGetRow() */ 6198 b->rowindices = NULL; 6199 b->rowvalues = NULL; 6200 b->getrowactive = PETSC_FALSE; 6201 6202 /* flexible pointer used in CUSPARSE classes */ 6203 b->spptr = NULL; 6204 6205 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6206 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6207 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6208 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6209 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6210 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6211 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6212 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6213 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6214 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6215 #if defined(PETSC_HAVE_CUDA) 6216 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6217 #endif 6218 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6219 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6220 #endif 6221 #if defined(PETSC_HAVE_MKL_SPARSE) 6222 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6223 #endif 6224 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6225 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6226 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6227 #if defined(PETSC_HAVE_ELEMENTAL) 6228 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6229 #endif 6230 #if defined(PETSC_HAVE_SCALAPACK) 6231 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6232 #endif 6233 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6234 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6235 #if defined(PETSC_HAVE_HYPRE) 6236 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6237 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6238 #endif 6239 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6240 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6241 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6242 PetscFunctionReturn(0); 6243 } 6244 6245 /*@C 6246 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6247 and "off-diagonal" part of the matrix in CSR format. 6248 6249 Collective 6250 6251 Input Parameters: 6252 + comm - MPI communicator 6253 . m - number of local rows (Cannot be PETSC_DECIDE) 6254 . n - This value should be the same as the local size used in creating the 6255 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6256 calculated if N is given) For square matrices n is almost always m. 6257 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6258 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6259 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6260 . j - column indices 6261 . a - matrix values 6262 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6263 . oj - column indices 6264 - oa - matrix values 6265 6266 Output Parameter: 6267 . mat - the matrix 6268 6269 Level: advanced 6270 6271 Notes: 6272 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6273 must free the arrays once the matrix has been destroyed and not before. 6274 6275 The i and j indices are 0 based 6276 6277 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6278 6279 This sets local rows and cannot be used to set off-processor values. 6280 6281 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6282 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6283 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6284 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6285 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6286 communication if it is known that only local entries will be set. 6287 6288 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6289 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6290 @*/ 6291 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6292 { 6293 PetscErrorCode ierr; 6294 Mat_MPIAIJ *maij; 6295 6296 PetscFunctionBegin; 6297 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6298 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6299 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6300 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6301 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6302 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6303 maij = (Mat_MPIAIJ*) (*mat)->data; 6304 6305 (*mat)->preallocated = PETSC_TRUE; 6306 6307 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6308 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6309 6310 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6311 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6312 6313 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6314 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6315 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6316 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6317 6318 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6319 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6320 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6321 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6322 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6323 PetscFunctionReturn(0); 6324 } 6325 6326 /* 6327 Special version for direct calls from Fortran 6328 */ 6329 #include <petsc/private/fortranimpl.h> 6330 6331 /* Change these macros so can be used in void function */ 6332 #undef CHKERRQ 6333 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6334 #undef SETERRQ2 6335 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6336 #undef SETERRQ3 6337 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6338 #undef SETERRQ 6339 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6340 6341 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6342 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6343 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6344 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6345 #else 6346 #endif 6347 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6348 { 6349 Mat mat = *mmat; 6350 PetscInt m = *mm, n = *mn; 6351 InsertMode addv = *maddv; 6352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6353 PetscScalar value; 6354 PetscErrorCode ierr; 6355 6356 MatCheckPreallocated(mat,1); 6357 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6358 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6359 { 6360 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6361 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6362 PetscBool roworiented = aij->roworiented; 6363 6364 /* Some Variables required in the macro */ 6365 Mat A = aij->A; 6366 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6367 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6368 MatScalar *aa = a->a; 6369 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6370 Mat B = aij->B; 6371 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6372 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6373 MatScalar *ba = b->a; 6374 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6375 * cannot use "#if defined" inside a macro. */ 6376 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6377 6378 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6379 PetscInt nonew = a->nonew; 6380 MatScalar *ap1,*ap2; 6381 6382 PetscFunctionBegin; 6383 for (i=0; i<m; i++) { 6384 if (im[i] < 0) continue; 6385 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6386 if (im[i] >= rstart && im[i] < rend) { 6387 row = im[i] - rstart; 6388 lastcol1 = -1; 6389 rp1 = aj + ai[row]; 6390 ap1 = aa + ai[row]; 6391 rmax1 = aimax[row]; 6392 nrow1 = ailen[row]; 6393 low1 = 0; 6394 high1 = nrow1; 6395 lastcol2 = -1; 6396 rp2 = bj + bi[row]; 6397 ap2 = ba + bi[row]; 6398 rmax2 = bimax[row]; 6399 nrow2 = bilen[row]; 6400 low2 = 0; 6401 high2 = nrow2; 6402 6403 for (j=0; j<n; j++) { 6404 if (roworiented) value = v[i*n+j]; 6405 else value = v[i+j*m]; 6406 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6407 if (in[j] >= cstart && in[j] < cend) { 6408 col = in[j] - cstart; 6409 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6410 #if defined(PETSC_HAVE_DEVICE) 6411 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6412 #endif 6413 } else if (in[j] < 0) continue; 6414 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6415 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6416 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6417 } else { 6418 if (mat->was_assembled) { 6419 if (!aij->colmap) { 6420 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6421 } 6422 #if defined(PETSC_USE_CTABLE) 6423 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6424 col--; 6425 #else 6426 col = aij->colmap[in[j]] - 1; 6427 #endif 6428 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6429 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6430 col = in[j]; 6431 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6432 B = aij->B; 6433 b = (Mat_SeqAIJ*)B->data; 6434 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6435 rp2 = bj + bi[row]; 6436 ap2 = ba + bi[row]; 6437 rmax2 = bimax[row]; 6438 nrow2 = bilen[row]; 6439 low2 = 0; 6440 high2 = nrow2; 6441 bm = aij->B->rmap->n; 6442 ba = b->a; 6443 inserted = PETSC_FALSE; 6444 } 6445 } else col = in[j]; 6446 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6447 #if defined(PETSC_HAVE_DEVICE) 6448 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6449 #endif 6450 } 6451 } 6452 } else if (!aij->donotstash) { 6453 if (roworiented) { 6454 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6455 } else { 6456 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6457 } 6458 } 6459 } 6460 } 6461 PetscFunctionReturnVoid(); 6462 } 6463 6464 typedef struct { 6465 Mat *mp; /* intermediate products */ 6466 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6467 PetscInt cp; /* number of intermediate products */ 6468 6469 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6470 PetscInt *startsj_s,*startsj_r; 6471 PetscScalar *bufa; 6472 Mat P_oth; 6473 6474 /* may take advantage of merging product->B */ 6475 Mat Bloc; 6476 6477 /* cusparse does not have support to split between symbolic and numeric phases 6478 When api_user is true, we don't need to update the numerical values 6479 of the temporary storage */ 6480 PetscBool reusesym; 6481 6482 /* support for COO values insertion */ 6483 PetscScalar *coo_v,*coo_w; 6484 PetscInt **own; 6485 PetscInt **off; 6486 PetscBool hasoffproc; /* if true, non-local values insertion (i.e. AtB or PtAP) */ 6487 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6488 PetscMemType mtype; 6489 6490 /* customization */ 6491 PetscBool abmerge; 6492 PetscBool P_oth_bind; 6493 } MatMatMPIAIJBACKEND; 6494 6495 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6496 { 6497 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6498 PetscInt i; 6499 PetscErrorCode ierr; 6500 6501 PetscFunctionBegin; 6502 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6503 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6504 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6505 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6506 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6507 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6508 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6509 for (i = 0; i < mmdata->cp; i++) { 6510 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6511 } 6512 ierr = PetscFree(mmdata->mp);CHKERRQ(ierr); 6513 ierr = PetscFree(mmdata->mptmp);CHKERRQ(ierr); 6514 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6515 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6516 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6517 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6518 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6519 PetscFunctionReturn(0); 6520 } 6521 6522 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6523 { 6524 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6525 PetscErrorCode ierr; 6526 6527 PetscFunctionBegin; 6528 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6529 if (f) { 6530 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6531 } else { 6532 const PetscScalar *vv; 6533 6534 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6535 if (n && idx) { 6536 PetscScalar *w = v; 6537 const PetscInt *oi = idx; 6538 PetscInt j; 6539 6540 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6541 } else { 6542 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6543 } 6544 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6545 } 6546 PetscFunctionReturn(0); 6547 } 6548 6549 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6550 { 6551 MatMatMPIAIJBACKEND *mmdata; 6552 PetscInt i,n_d,n_o; 6553 PetscErrorCode ierr; 6554 6555 PetscFunctionBegin; 6556 MatCheckProduct(C,1); 6557 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6558 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6559 if (!mmdata->reusesym) { /* update temporary matrices */ 6560 if (mmdata->P_oth) { 6561 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6562 } 6563 if (mmdata->Bloc) { 6564 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6565 } 6566 } 6567 mmdata->reusesym = PETSC_FALSE; 6568 6569 for (i = 0; i < mmdata->cp; i++) { 6570 if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6571 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6572 } 6573 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6574 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6575 6576 if (mmdata->mptmp[i]) continue; 6577 if (noff) { 6578 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6579 6580 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6581 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6582 n_o += noff; 6583 n_d += nown; 6584 } else { 6585 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6586 6587 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6588 n_d += mm->nz; 6589 } 6590 } 6591 if (mmdata->hasoffproc) { /* offprocess insertion */ 6592 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6593 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6594 } 6595 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6596 PetscFunctionReturn(0); 6597 } 6598 6599 /* Support for Pt * A, A * P, or Pt * A * P */ 6600 #define MAX_NUMBER_INTERMEDIATE 4 6601 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6602 { 6603 Mat_Product *product = C->product; 6604 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; 6605 Mat_MPIAIJ *a,*p; 6606 MatMatMPIAIJBACKEND *mmdata; 6607 ISLocalToGlobalMapping P_oth_l2g = NULL; 6608 IS glob = NULL; 6609 const char *prefix; 6610 char pprefix[256]; 6611 const PetscInt *globidx,*P_oth_idx; 6612 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; 6613 PetscInt cp = 0,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j,cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE],i,j; 6614 MatProductType ptype; 6615 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6616 PetscMPIInt size; 6617 PetscErrorCode ierr; 6618 6619 PetscFunctionBegin; 6620 MatCheckProduct(C,1); 6621 if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6622 ptype = product->type; 6623 if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB; 6624 switch (ptype) { 6625 case MATPRODUCT_AB: 6626 A = product->A; 6627 P = product->B; 6628 m = A->rmap->n; 6629 n = P->cmap->n; 6630 M = A->rmap->N; 6631 N = P->cmap->N; 6632 break; 6633 case MATPRODUCT_AtB: 6634 P = product->A; 6635 A = product->B; 6636 m = P->cmap->n; 6637 n = A->cmap->n; 6638 M = P->cmap->N; 6639 N = A->cmap->N; 6640 hasoffproc = PETSC_TRUE; 6641 break; 6642 case MATPRODUCT_PtAP: 6643 A = product->A; 6644 P = product->B; 6645 m = P->cmap->n; 6646 n = P->cmap->n; 6647 M = P->cmap->N; 6648 N = P->cmap->N; 6649 hasoffproc = PETSC_TRUE; 6650 break; 6651 default: 6652 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6653 } 6654 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRQ(ierr); 6655 if (size == 1) hasoffproc = PETSC_FALSE; 6656 6657 /* defaults */ 6658 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6659 mp[i] = NULL; 6660 mptmp[i] = PETSC_FALSE; 6661 rmapt[i] = -1; 6662 cmapt[i] = -1; 6663 rmapa[i] = NULL; 6664 cmapa[i] = NULL; 6665 } 6666 6667 /* customization */ 6668 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 6669 mmdata->reusesym = product->api_user; 6670 if (ptype == MATPRODUCT_AB) { 6671 if (product->api_user) { 6672 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6673 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6674 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6675 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6676 } else { 6677 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6678 ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6679 ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6680 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6681 } 6682 } else if (ptype == MATPRODUCT_PtAP) { 6683 if (product->api_user) { 6684 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6685 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6686 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6687 } else { 6688 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6689 ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6690 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6691 } 6692 } 6693 a = (Mat_MPIAIJ*)A->data; 6694 p = (Mat_MPIAIJ*)P->data; 6695 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 6696 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 6697 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 6698 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6699 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 6700 switch (ptype) { 6701 case MATPRODUCT_AB: /* A * P */ 6702 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6703 6704 if (mmdata->abmerge) { /* A_diag * P_loc and A_off * P_oth */ 6705 /* P is product->B */ 6706 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6707 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6708 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6709 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6710 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6711 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6712 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6713 mp[cp]->product->api_user = product->api_user; 6714 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6715 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6716 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6717 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6718 rmapt[cp] = 1; 6719 cmapt[cp] = 2; 6720 cmapa[cp] = globidx; 6721 mptmp[cp] = PETSC_FALSE; 6722 cp++; 6723 } else { /* A_diag * P_diag and A_diag * P_off and A_off * P_oth */ 6724 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 6725 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6726 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6727 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6728 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6729 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6730 mp[cp]->product->api_user = product->api_user; 6731 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6732 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6733 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6734 rmapt[cp] = 1; 6735 cmapt[cp] = 1; 6736 mptmp[cp] = PETSC_FALSE; 6737 cp++; 6738 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 6739 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6740 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6741 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6742 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6743 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6744 mp[cp]->product->api_user = product->api_user; 6745 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6746 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6747 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6748 rmapt[cp] = 1; 6749 cmapt[cp] = 2; 6750 cmapa[cp] = p->garray; 6751 mptmp[cp] = PETSC_FALSE; 6752 cp++; 6753 } 6754 if (mmdata->P_oth) { 6755 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6756 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6757 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6758 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6759 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6760 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6761 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6762 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6763 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6764 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6765 mp[cp]->product->api_user = product->api_user; 6766 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6767 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6768 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6769 rmapt[cp] = 1; 6770 cmapt[cp] = 2; 6771 cmapa[cp] = P_oth_idx; 6772 mptmp[cp] = PETSC_FALSE; 6773 cp++; 6774 } 6775 break; 6776 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6777 /* A is product->B */ 6778 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6779 if (A == P) { 6780 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6781 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6782 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6783 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6784 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6785 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6786 mp[cp]->product->api_user = product->api_user; 6787 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6788 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6789 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6790 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6791 rmapt[cp] = 2; 6792 rmapa[cp] = globidx; 6793 cmapt[cp] = 2; 6794 cmapa[cp] = globidx; 6795 mptmp[cp] = PETSC_FALSE; 6796 cp++; 6797 } else { 6798 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6799 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6800 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6801 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6802 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6803 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6804 mp[cp]->product->api_user = product->api_user; 6805 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6806 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6807 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6808 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6809 rmapt[cp] = 1; 6810 cmapt[cp] = 2; 6811 cmapa[cp] = globidx; 6812 mptmp[cp] = PETSC_FALSE; 6813 cp++; 6814 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6815 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6816 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6817 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6818 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6819 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6820 mp[cp]->product->api_user = product->api_user; 6821 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6822 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6823 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6824 rmapt[cp] = 2; 6825 rmapa[cp] = p->garray; 6826 cmapt[cp] = 2; 6827 cmapa[cp] = globidx; 6828 mptmp[cp] = PETSC_FALSE; 6829 cp++; 6830 } 6831 break; 6832 case MATPRODUCT_PtAP: 6833 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6834 /* P is product->B */ 6835 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6836 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6837 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 6838 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6839 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6840 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6841 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6842 mp[cp]->product->api_user = product->api_user; 6843 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6844 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6845 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6846 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6847 rmapt[cp] = 2; 6848 rmapa[cp] = globidx; 6849 cmapt[cp] = 2; 6850 cmapa[cp] = globidx; 6851 mptmp[cp] = PETSC_FALSE; 6852 cp++; 6853 if (mmdata->P_oth) { 6854 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6855 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6856 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6857 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6858 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6859 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6860 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6861 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6862 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6863 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6864 mp[cp]->product->api_user = product->api_user; 6865 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6866 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6867 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6868 mptmp[cp] = PETSC_TRUE; 6869 cp++; 6870 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 6871 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6872 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6873 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6874 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6875 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6876 mp[cp]->product->api_user = product->api_user; 6877 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6878 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6879 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6880 rmapt[cp] = 2; 6881 rmapa[cp] = globidx; 6882 cmapt[cp] = 2; 6883 cmapa[cp] = P_oth_idx; 6884 mptmp[cp] = PETSC_FALSE; 6885 cp++; 6886 } 6887 break; 6888 default: 6889 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6890 } 6891 /* sanity check */ 6892 if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i); 6893 6894 ierr = PetscMalloc1(cp,&mmdata->mp);CHKERRQ(ierr); 6895 for (i = 0; i < cp; i++) mmdata->mp[i] = mp[i]; 6896 ierr = PetscMalloc1(cp,&mmdata->mptmp);CHKERRQ(ierr); 6897 for (i = 0; i < cp; i++) mmdata->mptmp[i] = mptmp[i]; 6898 mmdata->cp = cp; 6899 C->product->data = mmdata; 6900 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 6901 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 6902 6903 /* memory type */ 6904 mmdata->mtype = PETSC_MEMTYPE_HOST; 6905 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 6906 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 6907 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 6908 // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented 6909 //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 6910 6911 /* prepare coo coordinates for values insertion */ 6912 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 6913 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6914 if (mptmp[cp]) continue; 6915 if (rmapt[cp] == 2 && hasoffproc) { 6916 const PetscInt *rmap = rmapa[cp]; 6917 const PetscInt mr = mp[cp]->rmap->n; 6918 const PetscInt rs = C->rmap->rstart; 6919 const PetscInt re = C->rmap->rend; 6920 const PetscInt *ii = mm->i; 6921 for (i = 0; i < mr; i++) { 6922 const PetscInt gr = rmap[i]; 6923 const PetscInt nz = ii[i+1] - ii[i]; 6924 if (gr < rs || gr >= re) ncoo_o += nz; 6925 else ncoo_oown += nz; 6926 } 6927 } else ncoo_d += mm->nz; 6928 } 6929 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); 6930 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 6931 if (hasoffproc) { /* handle offproc values insertion */ 6932 PetscSF msf; 6933 PetscInt ncoo2,*coo_i2,*coo_j2; 6934 6935 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 6936 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 6937 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); 6938 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 6939 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6940 PetscInt *idxoff = mmdata->off[cp]; 6941 PetscInt *idxown = mmdata->own[cp]; 6942 if (!mptmp[cp] && rmapt[cp] == 2) { 6943 const PetscInt *rmap = rmapa[cp]; 6944 const PetscInt *cmap = cmapa[cp]; 6945 const PetscInt *ii = mm->i; 6946 PetscInt *coi = coo_i + ncoo_o; 6947 PetscInt *coj = coo_j + ncoo_o; 6948 const PetscInt mr = mp[cp]->rmap->n; 6949 const PetscInt rs = C->rmap->rstart; 6950 const PetscInt re = C->rmap->rend; 6951 const PetscInt cs = C->cmap->rstart; 6952 for (i = 0; i < mr; i++) { 6953 const PetscInt *jj = mm->j + ii[i]; 6954 const PetscInt gr = rmap[i]; 6955 const PetscInt nz = ii[i+1] - ii[i]; 6956 if (gr < rs || gr >= re) { 6957 for (j = ii[i]; j < ii[i+1]; j++) { 6958 *coi++ = gr; 6959 *idxoff++ = j; 6960 } 6961 if (!cmapt[cp]) { /* already global */ 6962 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6963 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6964 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6965 } else { /* offdiag */ 6966 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6967 } 6968 ncoo_o += nz; 6969 } else { 6970 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 6971 } 6972 } 6973 } 6974 mmdata->off[cp + 1] = idxoff; 6975 mmdata->own[cp + 1] = idxown; 6976 } 6977 6978 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6979 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o,NULL,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 6980 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 6981 ierr = PetscSFGetGraph(msf,&ncoo2,NULL,NULL,NULL);CHKERRQ(ierr); 6982 ncoo = ncoo_d + ncoo_oown + ncoo2; 6983 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 6984 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6985 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6986 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6987 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6988 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6989 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 6990 coo_i = coo_i2; 6991 coo_j = coo_j2; 6992 } else { /* no offproc values insertion */ 6993 ncoo = ncoo_d; 6994 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 6995 6996 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6997 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 6998 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 6999 } 7000 mmdata->hasoffproc = hasoffproc; 7001 7002 /* on-process indices */ 7003 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7004 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7005 PetscInt *coi = coo_i + ncoo_d; 7006 PetscInt *coj = coo_j + ncoo_d; 7007 const PetscInt *jj = mm->j; 7008 const PetscInt *ii = mm->i; 7009 const PetscInt *cmap = cmapa[cp]; 7010 const PetscInt *rmap = rmapa[cp]; 7011 const PetscInt mr = mp[cp]->rmap->n; 7012 const PetscInt rs = C->rmap->rstart; 7013 const PetscInt re = C->rmap->rend; 7014 const PetscInt cs = C->cmap->rstart; 7015 7016 if (mptmp[cp]) continue; 7017 if (rmapt[cp] == 1) { 7018 for (i = 0; i < mr; i++) { 7019 const PetscInt gr = i + rs; 7020 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7021 } 7022 /* columns coo */ 7023 if (!cmapt[cp]) { 7024 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 7025 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7026 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; 7027 } else { /* offdiag */ 7028 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7029 } 7030 ncoo_d += mm->nz; 7031 } else if (rmapt[cp] == 2) { 7032 for (i = 0; i < mr; i++) { 7033 const PetscInt *jj = mm->j + ii[i]; 7034 const PetscInt gr = rmap[i]; 7035 const PetscInt nz = ii[i+1] - ii[i]; 7036 if (gr >= rs && gr < re) { 7037 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7038 if (!cmapt[cp]) { /* already global */ 7039 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7040 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7041 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7042 } else { /* offdiag */ 7043 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7044 } 7045 ncoo_d += nz; 7046 } 7047 } 7048 } 7049 } 7050 if (glob) { 7051 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 7052 } 7053 ierr = ISDestroy(&glob);CHKERRQ(ierr); 7054 if (P_oth_l2g) { 7055 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 7056 } 7057 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 7058 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 7059 7060 /* preallocate with COO data */ 7061 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 7062 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 7063 PetscFunctionReturn(0); 7064 } 7065 7066 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7067 { 7068 Mat_Product *product = mat->product; 7069 PetscErrorCode ierr; 7070 #if defined(PETSC_HAVE_DEVICE) 7071 PetscBool match = PETSC_FALSE; 7072 PetscBool usecpu = PETSC_FALSE; 7073 #else 7074 PetscBool match = PETSC_TRUE; 7075 #endif 7076 7077 PetscFunctionBegin; 7078 MatCheckProduct(mat,1); 7079 #if defined(PETSC_HAVE_DEVICE) 7080 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7081 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 7082 } 7083 if (match) { /* we can always fallback to CPU in case an operation is not performing on the device */ 7084 switch (product->type) { 7085 case MATPRODUCT_AB: 7086 if (product->api_user) { 7087 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 7088 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7089 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7090 } else { 7091 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 7092 ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7093 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7094 } 7095 break; 7096 case MATPRODUCT_AtB: 7097 if (product->api_user) { 7098 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 7099 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7100 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7101 } else { 7102 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 7103 ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7104 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7105 } 7106 break; 7107 case MATPRODUCT_PtAP: 7108 if (product->api_user) { 7109 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 7110 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7111 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7112 } else { 7113 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 7114 ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7115 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7116 } 7117 break; 7118 default: 7119 break; 7120 } 7121 match = (PetscBool)!usecpu; 7122 } 7123 #endif 7124 if (match) { 7125 switch (product->type) { 7126 case MATPRODUCT_AB: 7127 case MATPRODUCT_AtB: 7128 case MATPRODUCT_PtAP: 7129 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7130 break; 7131 default: 7132 break; 7133 } 7134 } 7135 /* fallback to MPIAIJ ops */ 7136 if (!mat->ops->productsymbolic) { 7137 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7138 } 7139 PetscFunctionReturn(0); 7140 } 7141