1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 #if defined(PETSC_USE_DEBUG) 583 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 584 #endif 585 if (im[i] >= rstart && im[i] < rend) { 586 row = im[i] - rstart; 587 lastcol1 = -1; 588 rp1 = aj + ai[row]; 589 ap1 = aa + ai[row]; 590 rmax1 = aimax[row]; 591 nrow1 = ailen[row]; 592 low1 = 0; 593 high1 = nrow1; 594 lastcol2 = -1; 595 rp2 = bj + bi[row]; 596 ap2 = ba + bi[row]; 597 rmax2 = bimax[row]; 598 nrow2 = bilen[row]; 599 low2 = 0; 600 high2 = nrow2; 601 602 for (j=0; j<n; j++) { 603 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 604 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 605 if (in[j] >= cstart && in[j] < cend) { 606 col = in[j] - cstart; 607 nonew = a->nonew; 608 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 609 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 610 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 611 #endif 612 } else if (in[j] < 0) continue; 613 #if defined(PETSC_USE_DEBUG) 614 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 615 #endif 616 else { 617 if (mat->was_assembled) { 618 if (!aij->colmap) { 619 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 620 } 621 #if defined(PETSC_USE_CTABLE) 622 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 623 col--; 624 #else 625 col = aij->colmap[in[j]] - 1; 626 #endif 627 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 628 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 629 col = in[j]; 630 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 631 B = aij->B; 632 b = (Mat_SeqAIJ*)B->data; 633 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 634 rp2 = bj + bi[row]; 635 ap2 = ba + bi[row]; 636 rmax2 = bimax[row]; 637 nrow2 = bilen[row]; 638 low2 = 0; 639 high2 = nrow2; 640 bm = aij->B->rmap->n; 641 ba = b->a; 642 inserted = PETSC_FALSE; 643 } else if (col < 0) { 644 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 645 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 646 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 647 } 648 } else col = in[j]; 649 nonew = b->nonew; 650 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 651 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 652 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 653 #endif 654 } 655 } 656 } else { 657 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 658 if (!aij->donotstash) { 659 mat->assembled = PETSC_FALSE; 660 if (roworiented) { 661 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 662 } else { 663 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 664 } 665 } 666 } 667 } 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 673 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 674 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 675 */ 676 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 677 { 678 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 679 Mat A = aij->A; /* diagonal part of the matrix */ 680 Mat B = aij->B; /* offdiagonal part of the matrix */ 681 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 682 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 683 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 684 PetscInt *ailen = a->ilen,*aj = a->j; 685 PetscInt *bilen = b->ilen,*bj = b->j; 686 PetscInt am = aij->A->rmap->n,j; 687 PetscInt diag_so_far = 0,dnz; 688 PetscInt offd_so_far = 0,onz; 689 690 PetscFunctionBegin; 691 /* Iterate over all rows of the matrix */ 692 for (j=0; j<am; j++) { 693 dnz = onz = 0; 694 /* Iterate over all non-zero columns of the current row */ 695 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 696 /* If column is in the diagonal */ 697 if (mat_j[col] >= cstart && mat_j[col] < cend) { 698 aj[diag_so_far++] = mat_j[col] - cstart; 699 dnz++; 700 } else { /* off-diagonal entries */ 701 bj[offd_so_far++] = mat_j[col]; 702 onz++; 703 } 704 } 705 ailen[j] = dnz; 706 bilen[j] = onz; 707 } 708 PetscFunctionReturn(0); 709 } 710 711 /* 712 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 713 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 714 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 715 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 716 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 717 */ 718 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat A = aij->A; /* diagonal part of the matrix */ 722 Mat B = aij->B; /* offdiagonal part of the matrix */ 723 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 724 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 725 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 726 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 727 PetscInt *ailen = a->ilen,*aj = a->j; 728 PetscInt *bilen = b->ilen,*bj = b->j; 729 PetscInt am = aij->A->rmap->n,j; 730 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 731 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 732 PetscScalar *aa = a->a,*ba = b->a; 733 734 PetscFunctionBegin; 735 /* Iterate over all rows of the matrix */ 736 for (j=0; j<am; j++) { 737 dnz_row = onz_row = 0; 738 rowstart_offd = full_offd_i[j]; 739 rowstart_diag = full_diag_i[j]; 740 /* Iterate over all non-zero columns of the current row */ 741 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 742 /* If column is in the diagonal */ 743 if (mat_j[col] >= cstart && mat_j[col] < cend) { 744 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 745 aa[rowstart_diag+dnz_row] = mat_a[col]; 746 dnz_row++; 747 } else { /* off-diagonal entries */ 748 bj[rowstart_offd+onz_row] = mat_j[col]; 749 ba[rowstart_offd+onz_row] = mat_a[col]; 750 onz_row++; 751 } 752 } 753 ailen[j] = dnz_row; 754 bilen[j] = onz_row; 755 } 756 PetscFunctionReturn(0); 757 } 758 759 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 760 { 761 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 762 PetscErrorCode ierr; 763 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 764 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 765 766 PetscFunctionBegin; 767 for (i=0; i<m; i++) { 768 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 769 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 770 if (idxm[i] >= rstart && idxm[i] < rend) { 771 row = idxm[i] - rstart; 772 for (j=0; j<n; j++) { 773 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 774 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 775 if (idxn[j] >= cstart && idxn[j] < cend) { 776 col = idxn[j] - cstart; 777 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 778 } else { 779 if (!aij->colmap) { 780 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 781 } 782 #if defined(PETSC_USE_CTABLE) 783 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 784 col--; 785 #else 786 col = aij->colmap[idxn[j]] - 1; 787 #endif 788 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 789 else { 790 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 791 } 792 } 793 } 794 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 795 } 796 PetscFunctionReturn(0); 797 } 798 799 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 800 801 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 802 { 803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 804 PetscErrorCode ierr; 805 PetscInt nstash,reallocs; 806 807 PetscFunctionBegin; 808 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 809 810 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 811 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 812 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 813 PetscFunctionReturn(0); 814 } 815 816 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 817 { 818 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 819 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 820 PetscErrorCode ierr; 821 PetscMPIInt n; 822 PetscInt i,j,rstart,ncols,flg; 823 PetscInt *row,*col; 824 PetscBool other_disassembled; 825 PetscScalar *val; 826 827 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 828 829 PetscFunctionBegin; 830 if (!aij->donotstash && !mat->nooffprocentries) { 831 while (1) { 832 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 833 if (!flg) break; 834 835 for (i=0; i<n; ) { 836 /* Now identify the consecutive vals belonging to the same row */ 837 for (j=i,rstart=row[j]; j<n; j++) { 838 if (row[j] != rstart) break; 839 } 840 if (j < n) ncols = j-i; 841 else ncols = n-i; 842 /* Now assemble all these values with a single function call */ 843 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 844 845 i = j; 846 } 847 } 848 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 849 } 850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 851 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 852 #endif 853 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 855 856 /* determine if any processor has disassembled, if so we must 857 also disassemble ourself, in order that we may reassemble. */ 858 /* 859 if nonzero structure of submatrix B cannot change then we know that 860 no processor disassembled thus we can skip this stuff 861 */ 862 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 863 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 864 if (mat->was_assembled && !other_disassembled) { 865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 866 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 867 #endif 868 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 869 } 870 } 871 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 872 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 876 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 877 #endif 878 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 879 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 880 881 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 882 883 aij->rowvalues = 0; 884 885 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 886 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 890 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 891 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 892 } 893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 894 mat->offloadmask = PETSC_OFFLOAD_BOTH; 895 #endif 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 900 { 901 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 906 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 913 PetscObjectState sA, sB; 914 PetscInt *lrows; 915 PetscInt r, len; 916 PetscBool cong, lch, gch; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* get locally owned rows */ 921 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 922 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 923 /* fix right hand side if needed */ 924 if (x && b) { 925 const PetscScalar *xx; 926 PetscScalar *bb; 927 928 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 929 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 930 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 931 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 932 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 933 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 934 } 935 936 sA = mat->A->nonzerostate; 937 sB = mat->B->nonzerostate; 938 939 if (diag != 0.0 && cong) { 940 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 941 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 942 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 943 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 944 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 945 PetscInt nnwA, nnwB; 946 PetscBool nnzA, nnzB; 947 948 nnwA = aijA->nonew; 949 nnwB = aijB->nonew; 950 nnzA = aijA->keepnonzeropattern; 951 nnzB = aijB->keepnonzeropattern; 952 if (!nnzA) { 953 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 954 aijA->nonew = 0; 955 } 956 if (!nnzB) { 957 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 958 aijB->nonew = 0; 959 } 960 /* Must zero here before the next loop */ 961 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 962 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 for (r = 0; r < len; ++r) { 964 const PetscInt row = lrows[r] + A->rmap->rstart; 965 if (row >= A->cmap->N) continue; 966 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 967 } 968 aijA->nonew = nnwA; 969 aijB->nonew = nnwB; 970 } else { 971 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 972 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 } 974 ierr = PetscFree(lrows);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 978 /* reduce nonzerostate */ 979 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 980 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 981 if (gch) A->nonzerostate++; 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 986 { 987 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 988 PetscErrorCode ierr; 989 PetscMPIInt n = A->rmap->n; 990 PetscInt i,j,r,m,len = 0; 991 PetscInt *lrows,*owners = A->rmap->range; 992 PetscMPIInt p = 0; 993 PetscSFNode *rrows; 994 PetscSF sf; 995 const PetscScalar *xx; 996 PetscScalar *bb,*mask; 997 Vec xmask,lmask; 998 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 999 const PetscInt *aj, *ii,*ridx; 1000 PetscScalar *aa; 1001 1002 PetscFunctionBegin; 1003 /* Create SF where leaves are input rows and roots are owned rows */ 1004 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1005 for (r = 0; r < n; ++r) lrows[r] = -1; 1006 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1007 for (r = 0; r < N; ++r) { 1008 const PetscInt idx = rows[r]; 1009 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1010 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1011 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1012 } 1013 rrows[r].rank = p; 1014 rrows[r].index = rows[r] - owners[p]; 1015 } 1016 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1017 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1018 /* Collect flags for rows to be zeroed */ 1019 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1020 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1021 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1022 /* Compress and put in row numbers */ 1023 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1024 /* zero diagonal part of matrix */ 1025 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1026 /* handle off diagonal part of matrix */ 1027 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1028 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1029 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1030 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1031 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1032 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1035 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1036 PetscBool cong; 1037 1038 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1039 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1040 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1043 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1044 } 1045 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1046 /* remove zeroed rows of off diagonal matrix */ 1047 ii = aij->i; 1048 for (i=0; i<len; i++) { 1049 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1050 } 1051 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1052 if (aij->compressedrow.use) { 1053 m = aij->compressedrow.nrows; 1054 ii = aij->compressedrow.i; 1055 ridx = aij->compressedrow.rindex; 1056 for (i=0; i<m; i++) { 1057 n = ii[i+1] - ii[i]; 1058 aj = aij->j + ii[i]; 1059 aa = aij->a + ii[i]; 1060 1061 for (j=0; j<n; j++) { 1062 if (PetscAbsScalar(mask[*aj])) { 1063 if (b) bb[*ridx] -= *aa*xx[*aj]; 1064 *aa = 0.0; 1065 } 1066 aa++; 1067 aj++; 1068 } 1069 ridx++; 1070 } 1071 } else { /* do not use compressed row format */ 1072 m = l->B->rmap->n; 1073 for (i=0; i<m; i++) { 1074 n = ii[i+1] - ii[i]; 1075 aj = aij->j + ii[i]; 1076 aa = aij->a + ii[i]; 1077 for (j=0; j<n; j++) { 1078 if (PetscAbsScalar(mask[*aj])) { 1079 if (b) bb[i] -= *aa*xx[*aj]; 1080 *aa = 0.0; 1081 } 1082 aa++; 1083 aj++; 1084 } 1085 } 1086 } 1087 if (x && b) { 1088 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1089 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1090 } 1091 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1092 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1093 ierr = PetscFree(lrows);CHKERRQ(ierr); 1094 1095 /* only change matrix nonzero state if pattern was allowed to be changed */ 1096 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1097 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1098 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 VecScatter Mvctx = a->Mvctx; 1109 1110 PetscFunctionBegin; 1111 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1112 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1113 1114 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1116 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1122 { 1123 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1132 { 1133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 VecScatter Mvctx = a->Mvctx; 1136 1137 PetscFunctionBegin; 1138 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1139 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1141 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1147 { 1148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1149 PetscErrorCode ierr; 1150 1151 PetscFunctionBegin; 1152 /* do nondiagonal part */ 1153 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1154 /* do local part */ 1155 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1156 /* add partial results together */ 1157 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1163 { 1164 MPI_Comm comm; 1165 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1166 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1167 IS Me,Notme; 1168 PetscErrorCode ierr; 1169 PetscInt M,N,first,last,*notme,i; 1170 PetscBool lf; 1171 PetscMPIInt size; 1172 1173 PetscFunctionBegin; 1174 /* Easy test: symmetric diagonal block */ 1175 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1176 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1177 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1178 if (!*f) PetscFunctionReturn(0); 1179 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1180 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1181 if (size == 1) PetscFunctionReturn(0); 1182 1183 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1184 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1185 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1186 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1187 for (i=0; i<first; i++) notme[i] = i; 1188 for (i=last; i<M; i++) notme[i-last+first] = i; 1189 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1190 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1191 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1192 Aoff = Aoffs[0]; 1193 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1194 Boff = Boffs[0]; 1195 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1196 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1197 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1198 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1199 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1200 ierr = PetscFree(notme);CHKERRQ(ierr); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1205 { 1206 PetscErrorCode ierr; 1207 1208 PetscFunctionBegin; 1209 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1214 { 1215 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1216 PetscErrorCode ierr; 1217 1218 PetscFunctionBegin; 1219 /* do nondiagonal part */ 1220 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1221 /* do local part */ 1222 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1223 /* add partial results together */ 1224 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 /* 1230 This only works correctly for square matrices where the subblock A->A is the 1231 diagonal block 1232 */ 1233 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1234 { 1235 PetscErrorCode ierr; 1236 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1237 1238 PetscFunctionBegin; 1239 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1240 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1241 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1246 { 1247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1248 PetscErrorCode ierr; 1249 1250 PetscFunctionBegin; 1251 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1252 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1257 { 1258 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1259 PetscErrorCode ierr; 1260 1261 PetscFunctionBegin; 1262 #if defined(PETSC_USE_LOG) 1263 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1264 #endif 1265 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1266 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1267 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1268 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1269 #if defined(PETSC_USE_CTABLE) 1270 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1271 #else 1272 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1273 #endif 1274 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1275 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1276 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1277 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1278 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1279 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1280 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1281 1282 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1292 #if defined(PETSC_HAVE_ELEMENTAL) 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1294 #endif 1295 #if defined(PETSC_HAVE_HYPRE) 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1298 #endif 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1305 { 1306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1307 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1308 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1309 PetscErrorCode ierr; 1310 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1311 int fd; 1312 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1313 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1314 PetscScalar *column_values; 1315 PetscInt message_count,flowcontrolcount; 1316 FILE *file; 1317 1318 PetscFunctionBegin; 1319 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1320 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1321 nz = A->nz + B->nz; 1322 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1323 if (!rank) { 1324 header[0] = MAT_FILE_CLASSID; 1325 header[1] = mat->rmap->N; 1326 header[2] = mat->cmap->N; 1327 1328 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1329 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1330 /* get largest number of rows any processor has */ 1331 rlen = mat->rmap->n; 1332 range = mat->rmap->range; 1333 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1334 } else { 1335 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1336 rlen = mat->rmap->n; 1337 } 1338 1339 /* load up the local row counts */ 1340 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1341 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1342 1343 /* store the row lengths to the file */ 1344 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1345 if (!rank) { 1346 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1347 for (i=1; i<size; i++) { 1348 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1349 rlen = range[i+1] - range[i]; 1350 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1351 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1352 } 1353 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1354 } else { 1355 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1356 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1357 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1358 } 1359 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1360 1361 /* load up the local column indices */ 1362 nzmax = nz; /* th processor needs space a largest processor needs */ 1363 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1364 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1365 cnt = 0; 1366 for (i=0; i<mat->rmap->n; i++) { 1367 for (j=B->i[i]; j<B->i[i+1]; j++) { 1368 if ((col = garray[B->j[j]]) > cstart) break; 1369 column_indices[cnt++] = col; 1370 } 1371 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1372 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1373 } 1374 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1375 1376 /* store the column indices to the file */ 1377 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1378 if (!rank) { 1379 MPI_Status status; 1380 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1381 for (i=1; i<size; i++) { 1382 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1383 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1384 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1385 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1386 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1387 } 1388 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1389 } else { 1390 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1391 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1392 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1393 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1394 } 1395 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1396 1397 /* load up the local column values */ 1398 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1399 cnt = 0; 1400 for (i=0; i<mat->rmap->n; i++) { 1401 for (j=B->i[i]; j<B->i[i+1]; j++) { 1402 if (garray[B->j[j]] > cstart) break; 1403 column_values[cnt++] = B->a[j]; 1404 } 1405 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1406 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1407 } 1408 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1409 1410 /* store the column values to the file */ 1411 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1412 if (!rank) { 1413 MPI_Status status; 1414 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1415 for (i=1; i<size; i++) { 1416 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1417 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1418 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1419 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1420 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1421 } 1422 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1423 } else { 1424 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1425 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1426 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1427 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1428 } 1429 ierr = PetscFree(column_values);CHKERRQ(ierr); 1430 1431 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1432 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #include <petscdraw.h> 1437 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1438 { 1439 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1440 PetscErrorCode ierr; 1441 PetscMPIInt rank = aij->rank,size = aij->size; 1442 PetscBool isdraw,iascii,isbinary; 1443 PetscViewer sviewer; 1444 PetscViewerFormat format; 1445 1446 PetscFunctionBegin; 1447 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1448 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1449 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1450 if (iascii) { 1451 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1452 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1453 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1454 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1455 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1456 for (i=0; i<(PetscInt)size; i++) { 1457 nmax = PetscMax(nmax,nz[i]); 1458 nmin = PetscMin(nmin,nz[i]); 1459 navg += nz[i]; 1460 } 1461 ierr = PetscFree(nz);CHKERRQ(ierr); 1462 navg = navg/size; 1463 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } 1466 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1467 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1468 MatInfo info; 1469 PetscBool inodes; 1470 1471 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1472 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1473 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1474 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1475 if (!inodes) { 1476 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1477 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1478 } else { 1479 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1480 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1481 } 1482 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1483 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1484 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1485 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1486 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1487 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1488 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1489 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1490 PetscFunctionReturn(0); 1491 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1492 PetscInt inodecount,inodelimit,*inodes; 1493 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1494 if (inodes) { 1495 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1496 } else { 1497 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1498 } 1499 PetscFunctionReturn(0); 1500 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1501 PetscFunctionReturn(0); 1502 } 1503 } else if (isbinary) { 1504 if (size == 1) { 1505 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1506 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1507 } else { 1508 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1509 } 1510 PetscFunctionReturn(0); 1511 } else if (iascii && size == 1) { 1512 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1513 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1514 PetscFunctionReturn(0); 1515 } else if (isdraw) { 1516 PetscDraw draw; 1517 PetscBool isnull; 1518 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1519 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1520 if (isnull) PetscFunctionReturn(0); 1521 } 1522 1523 { /* assemble the entire matrix onto first processor */ 1524 Mat A = NULL, Av; 1525 IS isrow,iscol; 1526 1527 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1528 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1529 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1530 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1531 /* The commented code uses MatCreateSubMatrices instead */ 1532 /* 1533 Mat *AA, A = NULL, Av; 1534 IS isrow,iscol; 1535 1536 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1537 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1538 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1539 if (!rank) { 1540 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1541 A = AA[0]; 1542 Av = AA[0]; 1543 } 1544 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1545 */ 1546 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1547 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1548 /* 1549 Everyone has to call to draw the matrix since the graphics waits are 1550 synchronized across all processors that share the PetscDraw object 1551 */ 1552 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1553 if (!rank) { 1554 if (((PetscObject)mat)->name) { 1555 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1556 } 1557 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1558 } 1559 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1560 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1561 ierr = MatDestroy(&A);CHKERRQ(ierr); 1562 } 1563 PetscFunctionReturn(0); 1564 } 1565 1566 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1567 { 1568 PetscErrorCode ierr; 1569 PetscBool iascii,isdraw,issocket,isbinary; 1570 1571 PetscFunctionBegin; 1572 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1573 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1574 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1575 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1576 if (iascii || isdraw || isbinary || issocket) { 1577 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1578 } 1579 PetscFunctionReturn(0); 1580 } 1581 1582 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1583 { 1584 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1585 PetscErrorCode ierr; 1586 Vec bb1 = 0; 1587 PetscBool hasop; 1588 1589 PetscFunctionBegin; 1590 if (flag == SOR_APPLY_UPPER) { 1591 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1592 PetscFunctionReturn(0); 1593 } 1594 1595 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1596 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1597 } 1598 1599 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1600 if (flag & SOR_ZERO_INITIAL_GUESS) { 1601 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1602 its--; 1603 } 1604 1605 while (its--) { 1606 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1607 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1608 1609 /* update rhs: bb1 = bb - B*x */ 1610 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1611 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1612 1613 /* local sweep */ 1614 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1615 } 1616 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1617 if (flag & SOR_ZERO_INITIAL_GUESS) { 1618 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1619 its--; 1620 } 1621 while (its--) { 1622 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1623 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 1625 /* update rhs: bb1 = bb - B*x */ 1626 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1627 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1628 1629 /* local sweep */ 1630 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1631 } 1632 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1633 if (flag & SOR_ZERO_INITIAL_GUESS) { 1634 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1635 its--; 1636 } 1637 while (its--) { 1638 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1639 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1640 1641 /* update rhs: bb1 = bb - B*x */ 1642 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1643 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1644 1645 /* local sweep */ 1646 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1647 } 1648 } else if (flag & SOR_EISENSTAT) { 1649 Vec xx1; 1650 1651 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1652 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1653 1654 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1655 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1656 if (!mat->diag) { 1657 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1658 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1659 } 1660 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1661 if (hasop) { 1662 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1663 } else { 1664 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1665 } 1666 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1667 1668 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1669 1670 /* local sweep */ 1671 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1672 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1673 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1674 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1675 1676 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1677 1678 matin->factorerrortype = mat->A->factorerrortype; 1679 PetscFunctionReturn(0); 1680 } 1681 1682 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1683 { 1684 Mat aA,aB,Aperm; 1685 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1686 PetscScalar *aa,*ba; 1687 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1688 PetscSF rowsf,sf; 1689 IS parcolp = NULL; 1690 PetscBool done; 1691 PetscErrorCode ierr; 1692 1693 PetscFunctionBegin; 1694 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1695 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1696 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1697 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1698 1699 /* Invert row permutation to find out where my rows should go */ 1700 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1701 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1702 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1703 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1704 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1705 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1706 1707 /* Invert column permutation to find out where my columns should go */ 1708 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1709 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1710 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1711 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1712 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1713 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1714 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1715 1716 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1717 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1718 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1719 1720 /* Find out where my gcols should go */ 1721 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1722 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1723 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1724 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1725 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1726 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1727 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1728 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1729 1730 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1731 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1732 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1733 for (i=0; i<m; i++) { 1734 PetscInt row = rdest[i]; 1735 PetscMPIInt rowner; 1736 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1737 for (j=ai[i]; j<ai[i+1]; j++) { 1738 PetscInt col = cdest[aj[j]]; 1739 PetscMPIInt cowner; 1740 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1741 if (rowner == cowner) dnnz[i]++; 1742 else onnz[i]++; 1743 } 1744 for (j=bi[i]; j<bi[i+1]; j++) { 1745 PetscInt col = gcdest[bj[j]]; 1746 PetscMPIInt cowner; 1747 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1748 if (rowner == cowner) dnnz[i]++; 1749 else onnz[i]++; 1750 } 1751 } 1752 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1753 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1754 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1755 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1756 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1757 1758 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1759 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1760 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1761 for (i=0; i<m; i++) { 1762 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1763 PetscInt j0,rowlen; 1764 rowlen = ai[i+1] - ai[i]; 1765 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1766 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1767 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1768 } 1769 rowlen = bi[i+1] - bi[i]; 1770 for (j0=j=0; j<rowlen; j0=j) { 1771 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1772 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1773 } 1774 } 1775 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1776 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1777 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1778 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1779 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1780 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1781 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1782 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1783 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1784 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1785 *B = Aperm; 1786 PetscFunctionReturn(0); 1787 } 1788 1789 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1790 { 1791 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1792 PetscErrorCode ierr; 1793 1794 PetscFunctionBegin; 1795 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1796 if (ghosts) *ghosts = aij->garray; 1797 PetscFunctionReturn(0); 1798 } 1799 1800 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1801 { 1802 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1803 Mat A = mat->A,B = mat->B; 1804 PetscErrorCode ierr; 1805 PetscLogDouble isend[5],irecv[5]; 1806 1807 PetscFunctionBegin; 1808 info->block_size = 1.0; 1809 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1810 1811 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1812 isend[3] = info->memory; isend[4] = info->mallocs; 1813 1814 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1815 1816 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1817 isend[3] += info->memory; isend[4] += info->mallocs; 1818 if (flag == MAT_LOCAL) { 1819 info->nz_used = isend[0]; 1820 info->nz_allocated = isend[1]; 1821 info->nz_unneeded = isend[2]; 1822 info->memory = isend[3]; 1823 info->mallocs = isend[4]; 1824 } else if (flag == MAT_GLOBAL_MAX) { 1825 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1826 1827 info->nz_used = irecv[0]; 1828 info->nz_allocated = irecv[1]; 1829 info->nz_unneeded = irecv[2]; 1830 info->memory = irecv[3]; 1831 info->mallocs = irecv[4]; 1832 } else if (flag == MAT_GLOBAL_SUM) { 1833 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1834 1835 info->nz_used = irecv[0]; 1836 info->nz_allocated = irecv[1]; 1837 info->nz_unneeded = irecv[2]; 1838 info->memory = irecv[3]; 1839 info->mallocs = irecv[4]; 1840 } 1841 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1842 info->fill_ratio_needed = 0; 1843 info->factor_mallocs = 0; 1844 PetscFunctionReturn(0); 1845 } 1846 1847 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1848 { 1849 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1850 PetscErrorCode ierr; 1851 1852 PetscFunctionBegin; 1853 switch (op) { 1854 case MAT_NEW_NONZERO_LOCATIONS: 1855 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1856 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1857 case MAT_KEEP_NONZERO_PATTERN: 1858 case MAT_NEW_NONZERO_LOCATION_ERR: 1859 case MAT_USE_INODES: 1860 case MAT_IGNORE_ZERO_ENTRIES: 1861 MatCheckPreallocated(A,1); 1862 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1863 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1864 break; 1865 case MAT_ROW_ORIENTED: 1866 MatCheckPreallocated(A,1); 1867 a->roworiented = flg; 1868 1869 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1870 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1871 break; 1872 case MAT_NEW_DIAGONALS: 1873 case MAT_SORTED_FULL: 1874 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1875 break; 1876 case MAT_IGNORE_OFF_PROC_ENTRIES: 1877 a->donotstash = flg; 1878 break; 1879 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1880 case MAT_SPD: 1881 case MAT_SYMMETRIC: 1882 case MAT_STRUCTURALLY_SYMMETRIC: 1883 case MAT_HERMITIAN: 1884 case MAT_SYMMETRY_ETERNAL: 1885 break; 1886 case MAT_SUBMAT_SINGLEIS: 1887 A->submat_singleis = flg; 1888 break; 1889 case MAT_STRUCTURE_ONLY: 1890 /* The option is handled directly by MatSetOption() */ 1891 break; 1892 default: 1893 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1894 } 1895 PetscFunctionReturn(0); 1896 } 1897 1898 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1899 { 1900 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1901 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1902 PetscErrorCode ierr; 1903 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1904 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1905 PetscInt *cmap,*idx_p; 1906 1907 PetscFunctionBegin; 1908 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1909 mat->getrowactive = PETSC_TRUE; 1910 1911 if (!mat->rowvalues && (idx || v)) { 1912 /* 1913 allocate enough space to hold information from the longest row. 1914 */ 1915 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1916 PetscInt max = 1,tmp; 1917 for (i=0; i<matin->rmap->n; i++) { 1918 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1919 if (max < tmp) max = tmp; 1920 } 1921 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1922 } 1923 1924 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1925 lrow = row - rstart; 1926 1927 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1928 if (!v) {pvA = 0; pvB = 0;} 1929 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1930 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1931 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1932 nztot = nzA + nzB; 1933 1934 cmap = mat->garray; 1935 if (v || idx) { 1936 if (nztot) { 1937 /* Sort by increasing column numbers, assuming A and B already sorted */ 1938 PetscInt imark = -1; 1939 if (v) { 1940 *v = v_p = mat->rowvalues; 1941 for (i=0; i<nzB; i++) { 1942 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1943 else break; 1944 } 1945 imark = i; 1946 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1947 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1948 } 1949 if (idx) { 1950 *idx = idx_p = mat->rowindices; 1951 if (imark > -1) { 1952 for (i=0; i<imark; i++) { 1953 idx_p[i] = cmap[cworkB[i]]; 1954 } 1955 } else { 1956 for (i=0; i<nzB; i++) { 1957 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1958 else break; 1959 } 1960 imark = i; 1961 } 1962 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1963 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1964 } 1965 } else { 1966 if (idx) *idx = 0; 1967 if (v) *v = 0; 1968 } 1969 } 1970 *nz = nztot; 1971 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1972 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1973 PetscFunctionReturn(0); 1974 } 1975 1976 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1977 { 1978 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1979 1980 PetscFunctionBegin; 1981 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1982 aij->getrowactive = PETSC_FALSE; 1983 PetscFunctionReturn(0); 1984 } 1985 1986 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1987 { 1988 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1989 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1990 PetscErrorCode ierr; 1991 PetscInt i,j,cstart = mat->cmap->rstart; 1992 PetscReal sum = 0.0; 1993 MatScalar *v; 1994 1995 PetscFunctionBegin; 1996 if (aij->size == 1) { 1997 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1998 } else { 1999 if (type == NORM_FROBENIUS) { 2000 v = amat->a; 2001 for (i=0; i<amat->nz; i++) { 2002 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2003 } 2004 v = bmat->a; 2005 for (i=0; i<bmat->nz; i++) { 2006 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2007 } 2008 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2009 *norm = PetscSqrtReal(*norm); 2010 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 2011 } else if (type == NORM_1) { /* max column norm */ 2012 PetscReal *tmp,*tmp2; 2013 PetscInt *jj,*garray = aij->garray; 2014 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 2015 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 2016 *norm = 0.0; 2017 v = amat->a; jj = amat->j; 2018 for (j=0; j<amat->nz; j++) { 2019 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2020 } 2021 v = bmat->a; jj = bmat->j; 2022 for (j=0; j<bmat->nz; j++) { 2023 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2024 } 2025 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2026 for (j=0; j<mat->cmap->N; j++) { 2027 if (tmp2[j] > *norm) *norm = tmp2[j]; 2028 } 2029 ierr = PetscFree(tmp);CHKERRQ(ierr); 2030 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2031 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2032 } else if (type == NORM_INFINITY) { /* max row norm */ 2033 PetscReal ntemp = 0.0; 2034 for (j=0; j<aij->A->rmap->n; j++) { 2035 v = amat->a + amat->i[j]; 2036 sum = 0.0; 2037 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2038 sum += PetscAbsScalar(*v); v++; 2039 } 2040 v = bmat->a + bmat->i[j]; 2041 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2042 sum += PetscAbsScalar(*v); v++; 2043 } 2044 if (sum > ntemp) ntemp = sum; 2045 } 2046 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2047 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2048 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2049 } 2050 PetscFunctionReturn(0); 2051 } 2052 2053 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2054 { 2055 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2056 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2057 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2058 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2059 PetscErrorCode ierr; 2060 Mat B,A_diag,*B_diag; 2061 const MatScalar *array; 2062 2063 PetscFunctionBegin; 2064 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2065 ai = Aloc->i; aj = Aloc->j; 2066 bi = Bloc->i; bj = Bloc->j; 2067 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2068 PetscInt *d_nnz,*g_nnz,*o_nnz; 2069 PetscSFNode *oloc; 2070 PETSC_UNUSED PetscSF sf; 2071 2072 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2073 /* compute d_nnz for preallocation */ 2074 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2075 for (i=0; i<ai[ma]; i++) { 2076 d_nnz[aj[i]]++; 2077 } 2078 /* compute local off-diagonal contributions */ 2079 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2080 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2081 /* map those to global */ 2082 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2083 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2084 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2085 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2086 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2087 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2088 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2089 2090 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2091 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2092 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2093 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2094 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2095 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2096 } else { 2097 B = *matout; 2098 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2099 } 2100 2101 b = (Mat_MPIAIJ*)B->data; 2102 A_diag = a->A; 2103 B_diag = &b->A; 2104 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2105 A_diag_ncol = A_diag->cmap->N; 2106 B_diag_ilen = sub_B_diag->ilen; 2107 B_diag_i = sub_B_diag->i; 2108 2109 /* Set ilen for diagonal of B */ 2110 for (i=0; i<A_diag_ncol; i++) { 2111 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2112 } 2113 2114 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2115 very quickly (=without using MatSetValues), because all writes are local. */ 2116 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2117 2118 /* copy over the B part */ 2119 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2120 array = Bloc->a; 2121 row = A->rmap->rstart; 2122 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2123 cols_tmp = cols; 2124 for (i=0; i<mb; i++) { 2125 ncol = bi[i+1]-bi[i]; 2126 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2127 row++; 2128 array += ncol; cols_tmp += ncol; 2129 } 2130 ierr = PetscFree(cols);CHKERRQ(ierr); 2131 2132 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2133 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2134 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2135 *matout = B; 2136 } else { 2137 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2138 } 2139 PetscFunctionReturn(0); 2140 } 2141 2142 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2143 { 2144 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2145 Mat a = aij->A,b = aij->B; 2146 PetscErrorCode ierr; 2147 PetscInt s1,s2,s3; 2148 2149 PetscFunctionBegin; 2150 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2151 if (rr) { 2152 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2153 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2154 /* Overlap communication with computation. */ 2155 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2156 } 2157 if (ll) { 2158 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2159 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2160 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2161 } 2162 /* scale the diagonal block */ 2163 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2164 2165 if (rr) { 2166 /* Do a scatter end and then right scale the off-diagonal block */ 2167 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2168 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2169 } 2170 PetscFunctionReturn(0); 2171 } 2172 2173 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2174 { 2175 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2176 PetscErrorCode ierr; 2177 2178 PetscFunctionBegin; 2179 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2180 PetscFunctionReturn(0); 2181 } 2182 2183 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2184 { 2185 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2186 Mat a,b,c,d; 2187 PetscBool flg; 2188 PetscErrorCode ierr; 2189 2190 PetscFunctionBegin; 2191 a = matA->A; b = matA->B; 2192 c = matB->A; d = matB->B; 2193 2194 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2195 if (flg) { 2196 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2197 } 2198 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2199 PetscFunctionReturn(0); 2200 } 2201 2202 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2203 { 2204 PetscErrorCode ierr; 2205 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2206 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2207 2208 PetscFunctionBegin; 2209 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2210 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2211 /* because of the column compression in the off-processor part of the matrix a->B, 2212 the number of columns in a->B and b->B may be different, hence we cannot call 2213 the MatCopy() directly on the two parts. If need be, we can provide a more 2214 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2215 then copying the submatrices */ 2216 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2217 } else { 2218 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2219 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2220 } 2221 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2222 PetscFunctionReturn(0); 2223 } 2224 2225 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2226 { 2227 PetscErrorCode ierr; 2228 2229 PetscFunctionBegin; 2230 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 /* 2235 Computes the number of nonzeros per row needed for preallocation when X and Y 2236 have different nonzero structure. 2237 */ 2238 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2239 { 2240 PetscInt i,j,k,nzx,nzy; 2241 2242 PetscFunctionBegin; 2243 /* Set the number of nonzeros in the new matrix */ 2244 for (i=0; i<m; i++) { 2245 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2246 nzx = xi[i+1] - xi[i]; 2247 nzy = yi[i+1] - yi[i]; 2248 nnz[i] = 0; 2249 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2250 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2251 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2252 nnz[i]++; 2253 } 2254 for (; k<nzy; k++) nnz[i]++; 2255 } 2256 PetscFunctionReturn(0); 2257 } 2258 2259 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2260 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2261 { 2262 PetscErrorCode ierr; 2263 PetscInt m = Y->rmap->N; 2264 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2265 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2266 2267 PetscFunctionBegin; 2268 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 2272 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2273 { 2274 PetscErrorCode ierr; 2275 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2276 PetscBLASInt bnz,one=1; 2277 Mat_SeqAIJ *x,*y; 2278 2279 PetscFunctionBegin; 2280 if (str == SAME_NONZERO_PATTERN) { 2281 PetscScalar alpha = a; 2282 x = (Mat_SeqAIJ*)xx->A->data; 2283 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2284 y = (Mat_SeqAIJ*)yy->A->data; 2285 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2286 x = (Mat_SeqAIJ*)xx->B->data; 2287 y = (Mat_SeqAIJ*)yy->B->data; 2288 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2289 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2290 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2291 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2292 will be updated */ 2293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2294 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2295 Y->offloadmask = PETSC_OFFLOAD_CPU; 2296 } 2297 #endif 2298 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2299 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2300 } else { 2301 Mat B; 2302 PetscInt *nnz_d,*nnz_o; 2303 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2304 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2305 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2306 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2307 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2308 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2309 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2310 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2311 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2312 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2313 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2314 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2315 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2316 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2317 } 2318 PetscFunctionReturn(0); 2319 } 2320 2321 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2322 2323 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2324 { 2325 #if defined(PETSC_USE_COMPLEX) 2326 PetscErrorCode ierr; 2327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2328 2329 PetscFunctionBegin; 2330 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2331 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2332 #else 2333 PetscFunctionBegin; 2334 #endif 2335 PetscFunctionReturn(0); 2336 } 2337 2338 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2339 { 2340 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2341 PetscErrorCode ierr; 2342 2343 PetscFunctionBegin; 2344 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2345 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2346 PetscFunctionReturn(0); 2347 } 2348 2349 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2350 { 2351 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2352 PetscErrorCode ierr; 2353 2354 PetscFunctionBegin; 2355 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2356 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2357 PetscFunctionReturn(0); 2358 } 2359 2360 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2361 { 2362 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2363 PetscErrorCode ierr; 2364 PetscInt i,*idxb = 0; 2365 PetscScalar *va,*vb; 2366 Vec vtmp; 2367 2368 PetscFunctionBegin; 2369 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2370 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2371 if (idx) { 2372 for (i=0; i<A->rmap->n; i++) { 2373 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2374 } 2375 } 2376 2377 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2378 if (idx) { 2379 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2380 } 2381 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2382 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2383 2384 for (i=0; i<A->rmap->n; i++) { 2385 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2386 va[i] = vb[i]; 2387 if (idx) idx[i] = a->garray[idxb[i]]; 2388 } 2389 } 2390 2391 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2392 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2393 ierr = PetscFree(idxb);CHKERRQ(ierr); 2394 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2395 PetscFunctionReturn(0); 2396 } 2397 2398 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2399 { 2400 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2401 PetscErrorCode ierr; 2402 PetscInt i,*idxb = 0; 2403 PetscScalar *va,*vb; 2404 Vec vtmp; 2405 2406 PetscFunctionBegin; 2407 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2408 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2409 if (idx) { 2410 for (i=0; i<A->cmap->n; i++) { 2411 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2412 } 2413 } 2414 2415 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2416 if (idx) { 2417 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2418 } 2419 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2420 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2421 2422 for (i=0; i<A->rmap->n; i++) { 2423 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2424 va[i] = vb[i]; 2425 if (idx) idx[i] = a->garray[idxb[i]]; 2426 } 2427 } 2428 2429 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2430 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2431 ierr = PetscFree(idxb);CHKERRQ(ierr); 2432 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2437 { 2438 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2439 PetscInt n = A->rmap->n; 2440 PetscInt cstart = A->cmap->rstart; 2441 PetscInt *cmap = mat->garray; 2442 PetscInt *diagIdx, *offdiagIdx; 2443 Vec diagV, offdiagV; 2444 PetscScalar *a, *diagA, *offdiagA; 2445 PetscInt r; 2446 PetscErrorCode ierr; 2447 2448 PetscFunctionBegin; 2449 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2450 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2451 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2452 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2453 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2454 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2455 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2456 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2457 for (r = 0; r < n; ++r) { 2458 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2459 a[r] = diagA[r]; 2460 idx[r] = cstart + diagIdx[r]; 2461 } else { 2462 a[r] = offdiagA[r]; 2463 idx[r] = cmap[offdiagIdx[r]]; 2464 } 2465 } 2466 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2467 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2468 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2469 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2470 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2471 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2476 { 2477 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2478 PetscInt n = A->rmap->n; 2479 PetscInt cstart = A->cmap->rstart; 2480 PetscInt *cmap = mat->garray; 2481 PetscInt *diagIdx, *offdiagIdx; 2482 Vec diagV, offdiagV; 2483 PetscScalar *a, *diagA, *offdiagA; 2484 PetscInt r; 2485 PetscErrorCode ierr; 2486 2487 PetscFunctionBegin; 2488 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2489 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2490 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2491 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2492 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2493 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2494 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2495 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2496 for (r = 0; r < n; ++r) { 2497 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2498 a[r] = diagA[r]; 2499 idx[r] = cstart + diagIdx[r]; 2500 } else { 2501 a[r] = offdiagA[r]; 2502 idx[r] = cmap[offdiagIdx[r]]; 2503 } 2504 } 2505 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2506 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2507 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2508 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2509 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2510 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2511 PetscFunctionReturn(0); 2512 } 2513 2514 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2515 { 2516 PetscErrorCode ierr; 2517 Mat *dummy; 2518 2519 PetscFunctionBegin; 2520 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2521 *newmat = *dummy; 2522 ierr = PetscFree(dummy);CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2527 { 2528 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2529 PetscErrorCode ierr; 2530 2531 PetscFunctionBegin; 2532 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2533 A->factorerrortype = a->A->factorerrortype; 2534 PetscFunctionReturn(0); 2535 } 2536 2537 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2538 { 2539 PetscErrorCode ierr; 2540 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2541 2542 PetscFunctionBegin; 2543 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2544 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2545 if (x->assembled) { 2546 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2547 } else { 2548 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2549 } 2550 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2551 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2552 PetscFunctionReturn(0); 2553 } 2554 2555 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2556 { 2557 PetscFunctionBegin; 2558 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2559 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2560 PetscFunctionReturn(0); 2561 } 2562 2563 /*@ 2564 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2565 2566 Collective on Mat 2567 2568 Input Parameters: 2569 + A - the matrix 2570 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2571 2572 Level: advanced 2573 2574 @*/ 2575 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2576 { 2577 PetscErrorCode ierr; 2578 2579 PetscFunctionBegin; 2580 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2581 PetscFunctionReturn(0); 2582 } 2583 2584 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2585 { 2586 PetscErrorCode ierr; 2587 PetscBool sc = PETSC_FALSE,flg; 2588 2589 PetscFunctionBegin; 2590 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2591 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2592 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2593 if (flg) { 2594 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2595 } 2596 ierr = PetscOptionsTail();CHKERRQ(ierr); 2597 PetscFunctionReturn(0); 2598 } 2599 2600 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2601 { 2602 PetscErrorCode ierr; 2603 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2604 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2605 2606 PetscFunctionBegin; 2607 if (!Y->preallocated) { 2608 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2609 } else if (!aij->nz) { 2610 PetscInt nonew = aij->nonew; 2611 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2612 aij->nonew = nonew; 2613 } 2614 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2615 PetscFunctionReturn(0); 2616 } 2617 2618 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2619 { 2620 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2621 PetscErrorCode ierr; 2622 2623 PetscFunctionBegin; 2624 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2625 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2626 if (d) { 2627 PetscInt rstart; 2628 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2629 *d += rstart; 2630 2631 } 2632 PetscFunctionReturn(0); 2633 } 2634 2635 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2636 { 2637 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2638 PetscErrorCode ierr; 2639 2640 PetscFunctionBegin; 2641 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2642 PetscFunctionReturn(0); 2643 } 2644 2645 /* -------------------------------------------------------------------*/ 2646 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2647 MatGetRow_MPIAIJ, 2648 MatRestoreRow_MPIAIJ, 2649 MatMult_MPIAIJ, 2650 /* 4*/ MatMultAdd_MPIAIJ, 2651 MatMultTranspose_MPIAIJ, 2652 MatMultTransposeAdd_MPIAIJ, 2653 0, 2654 0, 2655 0, 2656 /*10*/ 0, 2657 0, 2658 0, 2659 MatSOR_MPIAIJ, 2660 MatTranspose_MPIAIJ, 2661 /*15*/ MatGetInfo_MPIAIJ, 2662 MatEqual_MPIAIJ, 2663 MatGetDiagonal_MPIAIJ, 2664 MatDiagonalScale_MPIAIJ, 2665 MatNorm_MPIAIJ, 2666 /*20*/ MatAssemblyBegin_MPIAIJ, 2667 MatAssemblyEnd_MPIAIJ, 2668 MatSetOption_MPIAIJ, 2669 MatZeroEntries_MPIAIJ, 2670 /*24*/ MatZeroRows_MPIAIJ, 2671 0, 2672 0, 2673 0, 2674 0, 2675 /*29*/ MatSetUp_MPIAIJ, 2676 0, 2677 0, 2678 MatGetDiagonalBlock_MPIAIJ, 2679 0, 2680 /*34*/ MatDuplicate_MPIAIJ, 2681 0, 2682 0, 2683 0, 2684 0, 2685 /*39*/ MatAXPY_MPIAIJ, 2686 MatCreateSubMatrices_MPIAIJ, 2687 MatIncreaseOverlap_MPIAIJ, 2688 MatGetValues_MPIAIJ, 2689 MatCopy_MPIAIJ, 2690 /*44*/ MatGetRowMax_MPIAIJ, 2691 MatScale_MPIAIJ, 2692 MatShift_MPIAIJ, 2693 MatDiagonalSet_MPIAIJ, 2694 MatZeroRowsColumns_MPIAIJ, 2695 /*49*/ MatSetRandom_MPIAIJ, 2696 0, 2697 0, 2698 0, 2699 0, 2700 /*54*/ MatFDColoringCreate_MPIXAIJ, 2701 0, 2702 MatSetUnfactored_MPIAIJ, 2703 MatPermute_MPIAIJ, 2704 0, 2705 /*59*/ MatCreateSubMatrix_MPIAIJ, 2706 MatDestroy_MPIAIJ, 2707 MatView_MPIAIJ, 2708 0, 2709 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2710 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2711 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2712 0, 2713 0, 2714 0, 2715 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2716 MatGetRowMinAbs_MPIAIJ, 2717 0, 2718 0, 2719 0, 2720 0, 2721 /*75*/ MatFDColoringApply_AIJ, 2722 MatSetFromOptions_MPIAIJ, 2723 0, 2724 0, 2725 MatFindZeroDiagonals_MPIAIJ, 2726 /*80*/ 0, 2727 0, 2728 0, 2729 /*83*/ MatLoad_MPIAIJ, 2730 MatIsSymmetric_MPIAIJ, 2731 0, 2732 0, 2733 0, 2734 0, 2735 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2736 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2737 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2738 MatPtAP_MPIAIJ_MPIAIJ, 2739 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2740 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2741 0, 2742 0, 2743 0, 2744 MatBindToCPU_MPIAIJ, 2745 /*99*/ 0, 2746 0, 2747 0, 2748 MatConjugate_MPIAIJ, 2749 0, 2750 /*104*/MatSetValuesRow_MPIAIJ, 2751 MatRealPart_MPIAIJ, 2752 MatImaginaryPart_MPIAIJ, 2753 0, 2754 0, 2755 /*109*/0, 2756 0, 2757 MatGetRowMin_MPIAIJ, 2758 0, 2759 MatMissingDiagonal_MPIAIJ, 2760 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2761 0, 2762 MatGetGhosts_MPIAIJ, 2763 0, 2764 0, 2765 /*119*/0, 2766 0, 2767 0, 2768 0, 2769 MatGetMultiProcBlock_MPIAIJ, 2770 /*124*/MatFindNonzeroRows_MPIAIJ, 2771 MatGetColumnNorms_MPIAIJ, 2772 MatInvertBlockDiagonal_MPIAIJ, 2773 MatInvertVariableBlockDiagonal_MPIAIJ, 2774 MatCreateSubMatricesMPI_MPIAIJ, 2775 /*129*/0, 2776 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2777 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2778 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2779 0, 2780 /*134*/0, 2781 0, 2782 MatRARt_MPIAIJ_MPIAIJ, 2783 0, 2784 0, 2785 /*139*/MatSetBlockSizes_MPIAIJ, 2786 0, 2787 0, 2788 MatFDColoringSetUp_MPIXAIJ, 2789 MatFindOffBlockDiagonalEntries_MPIAIJ, 2790 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2791 }; 2792 2793 /* ----------------------------------------------------------------------------------------*/ 2794 2795 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2796 { 2797 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2798 PetscErrorCode ierr; 2799 2800 PetscFunctionBegin; 2801 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2802 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2803 PetscFunctionReturn(0); 2804 } 2805 2806 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2807 { 2808 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2809 PetscErrorCode ierr; 2810 2811 PetscFunctionBegin; 2812 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2813 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2814 PetscFunctionReturn(0); 2815 } 2816 2817 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2818 { 2819 Mat_MPIAIJ *b; 2820 PetscErrorCode ierr; 2821 PetscMPIInt size; 2822 2823 PetscFunctionBegin; 2824 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2825 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2826 b = (Mat_MPIAIJ*)B->data; 2827 2828 #if defined(PETSC_USE_CTABLE) 2829 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2830 #else 2831 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2832 #endif 2833 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2834 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2835 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2836 2837 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2838 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2839 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2840 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2841 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2842 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2843 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2844 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2845 2846 if (!B->preallocated) { 2847 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2848 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2849 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2850 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2851 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2852 } 2853 2854 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2855 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2856 B->preallocated = PETSC_TRUE; 2857 B->was_assembled = PETSC_FALSE; 2858 B->assembled = PETSC_FALSE; 2859 PetscFunctionReturn(0); 2860 } 2861 2862 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2863 { 2864 Mat_MPIAIJ *b; 2865 PetscErrorCode ierr; 2866 2867 PetscFunctionBegin; 2868 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2869 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2870 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2871 b = (Mat_MPIAIJ*)B->data; 2872 2873 #if defined(PETSC_USE_CTABLE) 2874 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2875 #else 2876 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2877 #endif 2878 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2879 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2880 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2881 2882 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2883 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2884 B->preallocated = PETSC_TRUE; 2885 B->was_assembled = PETSC_FALSE; 2886 B->assembled = PETSC_FALSE; 2887 PetscFunctionReturn(0); 2888 } 2889 2890 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2891 { 2892 Mat mat; 2893 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2894 PetscErrorCode ierr; 2895 2896 PetscFunctionBegin; 2897 *newmat = 0; 2898 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2899 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2900 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2901 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2902 a = (Mat_MPIAIJ*)mat->data; 2903 2904 mat->factortype = matin->factortype; 2905 mat->assembled = PETSC_TRUE; 2906 mat->insertmode = NOT_SET_VALUES; 2907 mat->preallocated = PETSC_TRUE; 2908 2909 a->size = oldmat->size; 2910 a->rank = oldmat->rank; 2911 a->donotstash = oldmat->donotstash; 2912 a->roworiented = oldmat->roworiented; 2913 a->rowindices = 0; 2914 a->rowvalues = 0; 2915 a->getrowactive = PETSC_FALSE; 2916 2917 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2918 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2919 2920 if (oldmat->colmap) { 2921 #if defined(PETSC_USE_CTABLE) 2922 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2923 #else 2924 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2925 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2926 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2927 #endif 2928 } else a->colmap = 0; 2929 if (oldmat->garray) { 2930 PetscInt len; 2931 len = oldmat->B->cmap->n; 2932 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2933 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2934 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2935 } else a->garray = 0; 2936 2937 /* It may happen MatDuplicate is called with a non-assembled matrix 2938 In fact, MatDuplicate only requires the matrix to be preallocated 2939 This may happen inside a DMCreateMatrix_Shell */ 2940 if (oldmat->lvec) { 2941 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2942 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2943 } 2944 if (oldmat->Mvctx) { 2945 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2946 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2947 } 2948 if (oldmat->Mvctx_mpi1) { 2949 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2950 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2951 } 2952 2953 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2954 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2955 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2956 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2957 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2958 *newmat = mat; 2959 PetscFunctionReturn(0); 2960 } 2961 2962 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2963 { 2964 PetscBool isbinary, ishdf5; 2965 PetscErrorCode ierr; 2966 2967 PetscFunctionBegin; 2968 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2969 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2970 /* force binary viewer to load .info file if it has not yet done so */ 2971 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2972 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2973 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2974 if (isbinary) { 2975 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2976 } else if (ishdf5) { 2977 #if defined(PETSC_HAVE_HDF5) 2978 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2979 #else 2980 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2981 #endif 2982 } else { 2983 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2984 } 2985 PetscFunctionReturn(0); 2986 } 2987 2988 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2989 { 2990 PetscScalar *vals,*svals; 2991 MPI_Comm comm; 2992 PetscErrorCode ierr; 2993 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2994 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2995 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2996 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2997 PetscInt cend,cstart,n,*rowners; 2998 int fd; 2999 PetscInt bs = newMat->rmap->bs; 3000 3001 PetscFunctionBegin; 3002 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3003 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3004 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3005 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3006 if (!rank) { 3007 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3008 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3009 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 3010 } 3011 3012 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 3013 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3014 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3015 if (bs < 0) bs = 1; 3016 3017 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3018 M = header[1]; N = header[2]; 3019 3020 /* If global sizes are set, check if they are consistent with that given in the file */ 3021 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 3022 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 3023 3024 /* determine ownership of all (block) rows */ 3025 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3026 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3027 else m = newMat->rmap->n; /* Set by user */ 3028 3029 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 3030 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3031 3032 /* First process needs enough room for process with most rows */ 3033 if (!rank) { 3034 mmax = rowners[1]; 3035 for (i=2; i<=size; i++) { 3036 mmax = PetscMax(mmax, rowners[i]); 3037 } 3038 } else mmax = -1; /* unused, but compilers complain */ 3039 3040 rowners[0] = 0; 3041 for (i=2; i<=size; i++) { 3042 rowners[i] += rowners[i-1]; 3043 } 3044 rstart = rowners[rank]; 3045 rend = rowners[rank+1]; 3046 3047 /* distribute row lengths to all processors */ 3048 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3049 if (!rank) { 3050 ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr); 3051 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3052 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3053 for (j=0; j<m; j++) { 3054 procsnz[0] += ourlens[j]; 3055 } 3056 for (i=1; i<size; i++) { 3057 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr); 3058 /* calculate the number of nonzeros on each processor */ 3059 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3060 procsnz[i] += rowlengths[j]; 3061 } 3062 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3063 } 3064 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3065 } else { 3066 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3067 } 3068 3069 if (!rank) { 3070 /* determine max buffer needed and allocate it */ 3071 maxnz = 0; 3072 for (i=0; i<size; i++) { 3073 maxnz = PetscMax(maxnz,procsnz[i]); 3074 } 3075 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3076 3077 /* read in my part of the matrix column indices */ 3078 nz = procsnz[0]; 3079 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3080 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3081 3082 /* read in every one elses and ship off */ 3083 for (i=1; i<size; i++) { 3084 nz = procsnz[i]; 3085 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3086 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3087 } 3088 ierr = PetscFree(cols);CHKERRQ(ierr); 3089 } else { 3090 /* determine buffer space needed for message */ 3091 nz = 0; 3092 for (i=0; i<m; i++) { 3093 nz += ourlens[i]; 3094 } 3095 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3096 3097 /* receive message of column indices*/ 3098 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3099 } 3100 3101 /* determine column ownership if matrix is not square */ 3102 if (N != M) { 3103 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3104 else n = newMat->cmap->n; 3105 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3106 cstart = cend - n; 3107 } else { 3108 cstart = rstart; 3109 cend = rend; 3110 n = cend - cstart; 3111 } 3112 3113 /* loop over local rows, determining number of off diagonal entries */ 3114 ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr); 3115 jj = 0; 3116 for (i=0; i<m; i++) { 3117 for (j=0; j<ourlens[i]; j++) { 3118 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3119 jj++; 3120 } 3121 } 3122 3123 for (i=0; i<m; i++) { 3124 ourlens[i] -= offlens[i]; 3125 } 3126 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3127 3128 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3129 3130 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3131 3132 for (i=0; i<m; i++) { 3133 ourlens[i] += offlens[i]; 3134 } 3135 3136 if (!rank) { 3137 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3138 3139 /* read in my part of the matrix numerical values */ 3140 nz = procsnz[0]; 3141 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3142 3143 /* insert into matrix */ 3144 jj = rstart; 3145 smycols = mycols; 3146 svals = vals; 3147 for (i=0; i<m; i++) { 3148 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3149 smycols += ourlens[i]; 3150 svals += ourlens[i]; 3151 jj++; 3152 } 3153 3154 /* read in other processors and ship out */ 3155 for (i=1; i<size; i++) { 3156 nz = procsnz[i]; 3157 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3158 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3159 } 3160 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3161 } else { 3162 /* receive numeric values */ 3163 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3164 3165 /* receive message of values*/ 3166 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3167 3168 /* insert into matrix */ 3169 jj = rstart; 3170 smycols = mycols; 3171 svals = vals; 3172 for (i=0; i<m; i++) { 3173 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3174 smycols += ourlens[i]; 3175 svals += ourlens[i]; 3176 jj++; 3177 } 3178 } 3179 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3180 ierr = PetscFree(vals);CHKERRQ(ierr); 3181 ierr = PetscFree(mycols);CHKERRQ(ierr); 3182 ierr = PetscFree(rowners);CHKERRQ(ierr); 3183 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3184 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3185 PetscFunctionReturn(0); 3186 } 3187 3188 /* Not scalable because of ISAllGather() unless getting all columns. */ 3189 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3190 { 3191 PetscErrorCode ierr; 3192 IS iscol_local; 3193 PetscBool isstride; 3194 PetscMPIInt lisstride=0,gisstride; 3195 3196 PetscFunctionBegin; 3197 /* check if we are grabbing all columns*/ 3198 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3199 3200 if (isstride) { 3201 PetscInt start,len,mstart,mlen; 3202 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3203 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3204 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3205 if (mstart == start && mlen-mstart == len) lisstride = 1; 3206 } 3207 3208 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3209 if (gisstride) { 3210 PetscInt N; 3211 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3212 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3213 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3214 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3215 } else { 3216 PetscInt cbs; 3217 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3218 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3219 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3220 } 3221 3222 *isseq = iscol_local; 3223 PetscFunctionReturn(0); 3224 } 3225 3226 /* 3227 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3228 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3229 3230 Input Parameters: 3231 mat - matrix 3232 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3233 i.e., mat->rstart <= isrow[i] < mat->rend 3234 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3235 i.e., mat->cstart <= iscol[i] < mat->cend 3236 Output Parameter: 3237 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3238 iscol_o - sequential column index set for retrieving mat->B 3239 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3240 */ 3241 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3242 { 3243 PetscErrorCode ierr; 3244 Vec x,cmap; 3245 const PetscInt *is_idx; 3246 PetscScalar *xarray,*cmaparray; 3247 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3248 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3249 Mat B=a->B; 3250 Vec lvec=a->lvec,lcmap; 3251 PetscInt i,cstart,cend,Bn=B->cmap->N; 3252 MPI_Comm comm; 3253 VecScatter Mvctx=a->Mvctx; 3254 3255 PetscFunctionBegin; 3256 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3257 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3258 3259 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3260 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3261 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3262 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3263 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3264 3265 /* Get start indices */ 3266 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3267 isstart -= ncols; 3268 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3269 3270 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3271 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3272 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3273 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3274 for (i=0; i<ncols; i++) { 3275 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3276 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3277 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3278 } 3279 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3280 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3281 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3282 3283 /* Get iscol_d */ 3284 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3285 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3286 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3287 3288 /* Get isrow_d */ 3289 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3290 rstart = mat->rmap->rstart; 3291 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3292 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3293 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3294 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3295 3296 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3297 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3298 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3299 3300 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3301 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3302 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3303 3304 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3305 3306 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3307 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3308 3309 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3310 /* off-process column indices */ 3311 count = 0; 3312 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3313 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3314 3315 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3316 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3317 for (i=0; i<Bn; i++) { 3318 if (PetscRealPart(xarray[i]) > -1.0) { 3319 idx[count] = i; /* local column index in off-diagonal part B */ 3320 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3321 count++; 3322 } 3323 } 3324 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3325 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3326 3327 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3328 /* cannot ensure iscol_o has same blocksize as iscol! */ 3329 3330 ierr = PetscFree(idx);CHKERRQ(ierr); 3331 *garray = cmap1; 3332 3333 ierr = VecDestroy(&x);CHKERRQ(ierr); 3334 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3335 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3336 PetscFunctionReturn(0); 3337 } 3338 3339 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3340 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3341 { 3342 PetscErrorCode ierr; 3343 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3344 Mat M = NULL; 3345 MPI_Comm comm; 3346 IS iscol_d,isrow_d,iscol_o; 3347 Mat Asub = NULL,Bsub = NULL; 3348 PetscInt n; 3349 3350 PetscFunctionBegin; 3351 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3352 3353 if (call == MAT_REUSE_MATRIX) { 3354 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3355 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3356 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3357 3358 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3359 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3360 3361 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3362 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3363 3364 /* Update diagonal and off-diagonal portions of submat */ 3365 asub = (Mat_MPIAIJ*)(*submat)->data; 3366 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3367 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3368 if (n) { 3369 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3370 } 3371 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3372 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3373 3374 } else { /* call == MAT_INITIAL_MATRIX) */ 3375 const PetscInt *garray; 3376 PetscInt BsubN; 3377 3378 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3379 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3380 3381 /* Create local submatrices Asub and Bsub */ 3382 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3383 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3384 3385 /* Create submatrix M */ 3386 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3387 3388 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3389 asub = (Mat_MPIAIJ*)M->data; 3390 3391 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3392 n = asub->B->cmap->N; 3393 if (BsubN > n) { 3394 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3395 const PetscInt *idx; 3396 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3397 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3398 3399 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3400 j = 0; 3401 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3402 for (i=0; i<n; i++) { 3403 if (j >= BsubN) break; 3404 while (subgarray[i] > garray[j]) j++; 3405 3406 if (subgarray[i] == garray[j]) { 3407 idx_new[i] = idx[j++]; 3408 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3409 } 3410 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3411 3412 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3413 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3414 3415 } else if (BsubN < n) { 3416 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3417 } 3418 3419 ierr = PetscFree(garray);CHKERRQ(ierr); 3420 *submat = M; 3421 3422 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3423 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3424 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3425 3426 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3427 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3428 3429 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3430 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3431 } 3432 PetscFunctionReturn(0); 3433 } 3434 3435 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3436 { 3437 PetscErrorCode ierr; 3438 IS iscol_local=NULL,isrow_d; 3439 PetscInt csize; 3440 PetscInt n,i,j,start,end; 3441 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3442 MPI_Comm comm; 3443 3444 PetscFunctionBegin; 3445 /* If isrow has same processor distribution as mat, 3446 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3447 if (call == MAT_REUSE_MATRIX) { 3448 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3449 if (isrow_d) { 3450 sameRowDist = PETSC_TRUE; 3451 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3452 } else { 3453 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3454 if (iscol_local) { 3455 sameRowDist = PETSC_TRUE; 3456 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3457 } 3458 } 3459 } else { 3460 /* Check if isrow has same processor distribution as mat */ 3461 sameDist[0] = PETSC_FALSE; 3462 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3463 if (!n) { 3464 sameDist[0] = PETSC_TRUE; 3465 } else { 3466 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3467 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3468 if (i >= start && j < end) { 3469 sameDist[0] = PETSC_TRUE; 3470 } 3471 } 3472 3473 /* Check if iscol has same processor distribution as mat */ 3474 sameDist[1] = PETSC_FALSE; 3475 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3476 if (!n) { 3477 sameDist[1] = PETSC_TRUE; 3478 } else { 3479 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3480 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3481 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3482 } 3483 3484 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3485 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3486 sameRowDist = tsameDist[0]; 3487 } 3488 3489 if (sameRowDist) { 3490 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3491 /* isrow and iscol have same processor distribution as mat */ 3492 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3493 PetscFunctionReturn(0); 3494 } else { /* sameRowDist */ 3495 /* isrow has same processor distribution as mat */ 3496 if (call == MAT_INITIAL_MATRIX) { 3497 PetscBool sorted; 3498 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3499 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3500 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3501 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3502 3503 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3504 if (sorted) { 3505 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3506 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3507 PetscFunctionReturn(0); 3508 } 3509 } else { /* call == MAT_REUSE_MATRIX */ 3510 IS iscol_sub; 3511 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3512 if (iscol_sub) { 3513 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3514 PetscFunctionReturn(0); 3515 } 3516 } 3517 } 3518 } 3519 3520 /* General case: iscol -> iscol_local which has global size of iscol */ 3521 if (call == MAT_REUSE_MATRIX) { 3522 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3523 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3524 } else { 3525 if (!iscol_local) { 3526 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3527 } 3528 } 3529 3530 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3531 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3532 3533 if (call == MAT_INITIAL_MATRIX) { 3534 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3535 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3536 } 3537 PetscFunctionReturn(0); 3538 } 3539 3540 /*@C 3541 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3542 and "off-diagonal" part of the matrix in CSR format. 3543 3544 Collective 3545 3546 Input Parameters: 3547 + comm - MPI communicator 3548 . A - "diagonal" portion of matrix 3549 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3550 - garray - global index of B columns 3551 3552 Output Parameter: 3553 . mat - the matrix, with input A as its local diagonal matrix 3554 Level: advanced 3555 3556 Notes: 3557 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3558 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3559 3560 .seealso: MatCreateMPIAIJWithSplitArrays() 3561 @*/ 3562 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3563 { 3564 PetscErrorCode ierr; 3565 Mat_MPIAIJ *maij; 3566 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3567 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3568 PetscScalar *oa=b->a; 3569 Mat Bnew; 3570 PetscInt m,n,N; 3571 3572 PetscFunctionBegin; 3573 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3574 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3575 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3576 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3577 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3578 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3579 3580 /* Get global columns of mat */ 3581 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3582 3583 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3584 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3585 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3586 maij = (Mat_MPIAIJ*)(*mat)->data; 3587 3588 (*mat)->preallocated = PETSC_TRUE; 3589 3590 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3591 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3592 3593 /* Set A as diagonal portion of *mat */ 3594 maij->A = A; 3595 3596 nz = oi[m]; 3597 for (i=0; i<nz; i++) { 3598 col = oj[i]; 3599 oj[i] = garray[col]; 3600 } 3601 3602 /* Set Bnew as off-diagonal portion of *mat */ 3603 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3604 bnew = (Mat_SeqAIJ*)Bnew->data; 3605 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3606 maij->B = Bnew; 3607 3608 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3609 3610 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3611 b->free_a = PETSC_FALSE; 3612 b->free_ij = PETSC_FALSE; 3613 ierr = MatDestroy(&B);CHKERRQ(ierr); 3614 3615 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3616 bnew->free_a = PETSC_TRUE; 3617 bnew->free_ij = PETSC_TRUE; 3618 3619 /* condense columns of maij->B */ 3620 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3621 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3622 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3623 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3624 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3625 PetscFunctionReturn(0); 3626 } 3627 3628 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3629 3630 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3631 { 3632 PetscErrorCode ierr; 3633 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3634 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3635 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3636 Mat M,Msub,B=a->B; 3637 MatScalar *aa; 3638 Mat_SeqAIJ *aij; 3639 PetscInt *garray = a->garray,*colsub,Ncols; 3640 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3641 IS iscol_sub,iscmap; 3642 const PetscInt *is_idx,*cmap; 3643 PetscBool allcolumns=PETSC_FALSE; 3644 MPI_Comm comm; 3645 3646 PetscFunctionBegin; 3647 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3648 3649 if (call == MAT_REUSE_MATRIX) { 3650 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3651 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3652 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3653 3654 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3655 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3656 3657 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3658 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3659 3660 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3661 3662 } else { /* call == MAT_INITIAL_MATRIX) */ 3663 PetscBool flg; 3664 3665 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3666 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3667 3668 /* (1) iscol -> nonscalable iscol_local */ 3669 /* Check for special case: each processor gets entire matrix columns */ 3670 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3671 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3672 if (allcolumns) { 3673 iscol_sub = iscol_local; 3674 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3675 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3676 3677 } else { 3678 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3679 PetscInt *idx,*cmap1,k; 3680 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3681 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3682 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3683 count = 0; 3684 k = 0; 3685 for (i=0; i<Ncols; i++) { 3686 j = is_idx[i]; 3687 if (j >= cstart && j < cend) { 3688 /* diagonal part of mat */ 3689 idx[count] = j; 3690 cmap1[count++] = i; /* column index in submat */ 3691 } else if (Bn) { 3692 /* off-diagonal part of mat */ 3693 if (j == garray[k]) { 3694 idx[count] = j; 3695 cmap1[count++] = i; /* column index in submat */ 3696 } else if (j > garray[k]) { 3697 while (j > garray[k] && k < Bn-1) k++; 3698 if (j == garray[k]) { 3699 idx[count] = j; 3700 cmap1[count++] = i; /* column index in submat */ 3701 } 3702 } 3703 } 3704 } 3705 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3706 3707 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3708 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3709 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3710 3711 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3712 } 3713 3714 /* (3) Create sequential Msub */ 3715 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3716 } 3717 3718 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3719 aij = (Mat_SeqAIJ*)(Msub)->data; 3720 ii = aij->i; 3721 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3722 3723 /* 3724 m - number of local rows 3725 Ncols - number of columns (same on all processors) 3726 rstart - first row in new global matrix generated 3727 */ 3728 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3729 3730 if (call == MAT_INITIAL_MATRIX) { 3731 /* (4) Create parallel newmat */ 3732 PetscMPIInt rank,size; 3733 PetscInt csize; 3734 3735 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3736 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3737 3738 /* 3739 Determine the number of non-zeros in the diagonal and off-diagonal 3740 portions of the matrix in order to do correct preallocation 3741 */ 3742 3743 /* first get start and end of "diagonal" columns */ 3744 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3745 if (csize == PETSC_DECIDE) { 3746 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3747 if (mglobal == Ncols) { /* square matrix */ 3748 nlocal = m; 3749 } else { 3750 nlocal = Ncols/size + ((Ncols % size) > rank); 3751 } 3752 } else { 3753 nlocal = csize; 3754 } 3755 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3756 rstart = rend - nlocal; 3757 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3758 3759 /* next, compute all the lengths */ 3760 jj = aij->j; 3761 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3762 olens = dlens + m; 3763 for (i=0; i<m; i++) { 3764 jend = ii[i+1] - ii[i]; 3765 olen = 0; 3766 dlen = 0; 3767 for (j=0; j<jend; j++) { 3768 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3769 else dlen++; 3770 jj++; 3771 } 3772 olens[i] = olen; 3773 dlens[i] = dlen; 3774 } 3775 3776 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3777 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3778 3779 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3780 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3781 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3782 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3783 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3784 ierr = PetscFree(dlens);CHKERRQ(ierr); 3785 3786 } else { /* call == MAT_REUSE_MATRIX */ 3787 M = *newmat; 3788 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3789 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3790 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3791 /* 3792 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3793 rather than the slower MatSetValues(). 3794 */ 3795 M->was_assembled = PETSC_TRUE; 3796 M->assembled = PETSC_FALSE; 3797 } 3798 3799 /* (5) Set values of Msub to *newmat */ 3800 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3801 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3802 3803 jj = aij->j; 3804 aa = aij->a; 3805 for (i=0; i<m; i++) { 3806 row = rstart + i; 3807 nz = ii[i+1] - ii[i]; 3808 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3809 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3810 jj += nz; aa += nz; 3811 } 3812 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3813 3814 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3815 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3816 3817 ierr = PetscFree(colsub);CHKERRQ(ierr); 3818 3819 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3820 if (call == MAT_INITIAL_MATRIX) { 3821 *newmat = M; 3822 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3823 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3824 3825 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3826 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3827 3828 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3829 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3830 3831 if (iscol_local) { 3832 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3833 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3834 } 3835 } 3836 PetscFunctionReturn(0); 3837 } 3838 3839 /* 3840 Not great since it makes two copies of the submatrix, first an SeqAIJ 3841 in local and then by concatenating the local matrices the end result. 3842 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3843 3844 Note: This requires a sequential iscol with all indices. 3845 */ 3846 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3847 { 3848 PetscErrorCode ierr; 3849 PetscMPIInt rank,size; 3850 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3851 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3852 Mat M,Mreuse; 3853 MatScalar *aa,*vwork; 3854 MPI_Comm comm; 3855 Mat_SeqAIJ *aij; 3856 PetscBool colflag,allcolumns=PETSC_FALSE; 3857 3858 PetscFunctionBegin; 3859 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3860 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3861 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3862 3863 /* Check for special case: each processor gets entire matrix columns */ 3864 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3865 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3866 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3867 3868 if (call == MAT_REUSE_MATRIX) { 3869 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3870 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3871 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3872 } else { 3873 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3874 } 3875 3876 /* 3877 m - number of local rows 3878 n - number of columns (same on all processors) 3879 rstart - first row in new global matrix generated 3880 */ 3881 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3882 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3883 if (call == MAT_INITIAL_MATRIX) { 3884 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3885 ii = aij->i; 3886 jj = aij->j; 3887 3888 /* 3889 Determine the number of non-zeros in the diagonal and off-diagonal 3890 portions of the matrix in order to do correct preallocation 3891 */ 3892 3893 /* first get start and end of "diagonal" columns */ 3894 if (csize == PETSC_DECIDE) { 3895 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3896 if (mglobal == n) { /* square matrix */ 3897 nlocal = m; 3898 } else { 3899 nlocal = n/size + ((n % size) > rank); 3900 } 3901 } else { 3902 nlocal = csize; 3903 } 3904 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3905 rstart = rend - nlocal; 3906 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3907 3908 /* next, compute all the lengths */ 3909 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3910 olens = dlens + m; 3911 for (i=0; i<m; i++) { 3912 jend = ii[i+1] - ii[i]; 3913 olen = 0; 3914 dlen = 0; 3915 for (j=0; j<jend; j++) { 3916 if (*jj < rstart || *jj >= rend) olen++; 3917 else dlen++; 3918 jj++; 3919 } 3920 olens[i] = olen; 3921 dlens[i] = dlen; 3922 } 3923 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3924 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3925 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3926 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3927 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3928 ierr = PetscFree(dlens);CHKERRQ(ierr); 3929 } else { 3930 PetscInt ml,nl; 3931 3932 M = *newmat; 3933 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3934 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3935 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3936 /* 3937 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3938 rather than the slower MatSetValues(). 3939 */ 3940 M->was_assembled = PETSC_TRUE; 3941 M->assembled = PETSC_FALSE; 3942 } 3943 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3944 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3945 ii = aij->i; 3946 jj = aij->j; 3947 aa = aij->a; 3948 for (i=0; i<m; i++) { 3949 row = rstart + i; 3950 nz = ii[i+1] - ii[i]; 3951 cwork = jj; jj += nz; 3952 vwork = aa; aa += nz; 3953 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3954 } 3955 3956 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3957 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3958 *newmat = M; 3959 3960 /* save submatrix used in processor for next request */ 3961 if (call == MAT_INITIAL_MATRIX) { 3962 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3963 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3964 } 3965 PetscFunctionReturn(0); 3966 } 3967 3968 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3969 { 3970 PetscInt m,cstart, cend,j,nnz,i,d; 3971 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3972 const PetscInt *JJ; 3973 PetscErrorCode ierr; 3974 PetscBool nooffprocentries; 3975 3976 PetscFunctionBegin; 3977 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3978 3979 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3980 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3981 m = B->rmap->n; 3982 cstart = B->cmap->rstart; 3983 cend = B->cmap->rend; 3984 rstart = B->rmap->rstart; 3985 3986 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3987 3988 #if defined(PETSC_USE_DEBUG) 3989 for (i=0; i<m; i++) { 3990 nnz = Ii[i+1]- Ii[i]; 3991 JJ = J + Ii[i]; 3992 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3993 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3994 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3995 } 3996 #endif 3997 3998 for (i=0; i<m; i++) { 3999 nnz = Ii[i+1]- Ii[i]; 4000 JJ = J + Ii[i]; 4001 nnz_max = PetscMax(nnz_max,nnz); 4002 d = 0; 4003 for (j=0; j<nnz; j++) { 4004 if (cstart <= JJ[j] && JJ[j] < cend) d++; 4005 } 4006 d_nnz[i] = d; 4007 o_nnz[i] = nnz - d; 4008 } 4009 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 4010 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 4011 4012 for (i=0; i<m; i++) { 4013 ii = i + rstart; 4014 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 4015 } 4016 nooffprocentries = B->nooffprocentries; 4017 B->nooffprocentries = PETSC_TRUE; 4018 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4019 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4020 B->nooffprocentries = nooffprocentries; 4021 4022 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4023 PetscFunctionReturn(0); 4024 } 4025 4026 /*@ 4027 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4028 (the default parallel PETSc format). 4029 4030 Collective 4031 4032 Input Parameters: 4033 + B - the matrix 4034 . i - the indices into j for the start of each local row (starts with zero) 4035 . j - the column indices for each local row (starts with zero) 4036 - v - optional values in the matrix 4037 4038 Level: developer 4039 4040 Notes: 4041 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 4042 thus you CANNOT change the matrix entries by changing the values of v[] after you have 4043 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4044 4045 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4046 4047 The format which is used for the sparse matrix input, is equivalent to a 4048 row-major ordering.. i.e for the following matrix, the input data expected is 4049 as shown 4050 4051 $ 1 0 0 4052 $ 2 0 3 P0 4053 $ ------- 4054 $ 4 5 6 P1 4055 $ 4056 $ Process0 [P0]: rows_owned=[0,1] 4057 $ i = {0,1,3} [size = nrow+1 = 2+1] 4058 $ j = {0,0,2} [size = 3] 4059 $ v = {1,2,3} [size = 3] 4060 $ 4061 $ Process1 [P1]: rows_owned=[2] 4062 $ i = {0,3} [size = nrow+1 = 1+1] 4063 $ j = {0,1,2} [size = 3] 4064 $ v = {4,5,6} [size = 3] 4065 4066 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4067 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4068 @*/ 4069 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4070 { 4071 PetscErrorCode ierr; 4072 4073 PetscFunctionBegin; 4074 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4075 PetscFunctionReturn(0); 4076 } 4077 4078 /*@C 4079 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4080 (the default parallel PETSc format). For good matrix assembly performance 4081 the user should preallocate the matrix storage by setting the parameters 4082 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4083 performance can be increased by more than a factor of 50. 4084 4085 Collective 4086 4087 Input Parameters: 4088 + B - the matrix 4089 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4090 (same value is used for all local rows) 4091 . d_nnz - array containing the number of nonzeros in the various rows of the 4092 DIAGONAL portion of the local submatrix (possibly different for each row) 4093 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4094 The size of this array is equal to the number of local rows, i.e 'm'. 4095 For matrices that will be factored, you must leave room for (and set) 4096 the diagonal entry even if it is zero. 4097 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4098 submatrix (same value is used for all local rows). 4099 - o_nnz - array containing the number of nonzeros in the various rows of the 4100 OFF-DIAGONAL portion of the local submatrix (possibly different for 4101 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4102 structure. The size of this array is equal to the number 4103 of local rows, i.e 'm'. 4104 4105 If the *_nnz parameter is given then the *_nz parameter is ignored 4106 4107 The AIJ format (also called the Yale sparse matrix format or 4108 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4109 storage. The stored row and column indices begin with zero. 4110 See Users-Manual: ch_mat for details. 4111 4112 The parallel matrix is partitioned such that the first m0 rows belong to 4113 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4114 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4115 4116 The DIAGONAL portion of the local submatrix of a processor can be defined 4117 as the submatrix which is obtained by extraction the part corresponding to 4118 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4119 first row that belongs to the processor, r2 is the last row belonging to 4120 the this processor, and c1-c2 is range of indices of the local part of a 4121 vector suitable for applying the matrix to. This is an mxn matrix. In the 4122 common case of a square matrix, the row and column ranges are the same and 4123 the DIAGONAL part is also square. The remaining portion of the local 4124 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4125 4126 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4127 4128 You can call MatGetInfo() to get information on how effective the preallocation was; 4129 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4130 You can also run with the option -info and look for messages with the string 4131 malloc in them to see if additional memory allocation was needed. 4132 4133 Example usage: 4134 4135 Consider the following 8x8 matrix with 34 non-zero values, that is 4136 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4137 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4138 as follows: 4139 4140 .vb 4141 1 2 0 | 0 3 0 | 0 4 4142 Proc0 0 5 6 | 7 0 0 | 8 0 4143 9 0 10 | 11 0 0 | 12 0 4144 ------------------------------------- 4145 13 0 14 | 15 16 17 | 0 0 4146 Proc1 0 18 0 | 19 20 21 | 0 0 4147 0 0 0 | 22 23 0 | 24 0 4148 ------------------------------------- 4149 Proc2 25 26 27 | 0 0 28 | 29 0 4150 30 0 0 | 31 32 33 | 0 34 4151 .ve 4152 4153 This can be represented as a collection of submatrices as: 4154 4155 .vb 4156 A B C 4157 D E F 4158 G H I 4159 .ve 4160 4161 Where the submatrices A,B,C are owned by proc0, D,E,F are 4162 owned by proc1, G,H,I are owned by proc2. 4163 4164 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4165 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4166 The 'M','N' parameters are 8,8, and have the same values on all procs. 4167 4168 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4169 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4170 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4171 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4172 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4173 matrix, ans [DF] as another SeqAIJ matrix. 4174 4175 When d_nz, o_nz parameters are specified, d_nz storage elements are 4176 allocated for every row of the local diagonal submatrix, and o_nz 4177 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4178 One way to choose d_nz and o_nz is to use the max nonzerors per local 4179 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4180 In this case, the values of d_nz,o_nz are: 4181 .vb 4182 proc0 : dnz = 2, o_nz = 2 4183 proc1 : dnz = 3, o_nz = 2 4184 proc2 : dnz = 1, o_nz = 4 4185 .ve 4186 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4187 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4188 for proc3. i.e we are using 12+15+10=37 storage locations to store 4189 34 values. 4190 4191 When d_nnz, o_nnz parameters are specified, the storage is specified 4192 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4193 In the above case the values for d_nnz,o_nnz are: 4194 .vb 4195 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4196 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4197 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4198 .ve 4199 Here the space allocated is sum of all the above values i.e 34, and 4200 hence pre-allocation is perfect. 4201 4202 Level: intermediate 4203 4204 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4205 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4206 @*/ 4207 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4208 { 4209 PetscErrorCode ierr; 4210 4211 PetscFunctionBegin; 4212 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4213 PetscValidType(B,1); 4214 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4215 PetscFunctionReturn(0); 4216 } 4217 4218 /*@ 4219 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4220 CSR format for the local rows. 4221 4222 Collective 4223 4224 Input Parameters: 4225 + comm - MPI communicator 4226 . m - number of local rows (Cannot be PETSC_DECIDE) 4227 . n - This value should be the same as the local size used in creating the 4228 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4229 calculated if N is given) For square matrices n is almost always m. 4230 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4231 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4232 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4233 . j - column indices 4234 - a - matrix values 4235 4236 Output Parameter: 4237 . mat - the matrix 4238 4239 Level: intermediate 4240 4241 Notes: 4242 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4243 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4244 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4245 4246 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4247 4248 The format which is used for the sparse matrix input, is equivalent to a 4249 row-major ordering.. i.e for the following matrix, the input data expected is 4250 as shown 4251 4252 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4253 4254 $ 1 0 0 4255 $ 2 0 3 P0 4256 $ ------- 4257 $ 4 5 6 P1 4258 $ 4259 $ Process0 [P0]: rows_owned=[0,1] 4260 $ i = {0,1,3} [size = nrow+1 = 2+1] 4261 $ j = {0,0,2} [size = 3] 4262 $ v = {1,2,3} [size = 3] 4263 $ 4264 $ Process1 [P1]: rows_owned=[2] 4265 $ i = {0,3} [size = nrow+1 = 1+1] 4266 $ j = {0,1,2} [size = 3] 4267 $ v = {4,5,6} [size = 3] 4268 4269 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4270 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4271 @*/ 4272 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4273 { 4274 PetscErrorCode ierr; 4275 4276 PetscFunctionBegin; 4277 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4278 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4279 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4280 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4281 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4282 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4283 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4284 PetscFunctionReturn(0); 4285 } 4286 4287 /*@ 4288 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4289 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4290 4291 Collective 4292 4293 Input Parameters: 4294 + mat - the matrix 4295 . m - number of local rows (Cannot be PETSC_DECIDE) 4296 . n - This value should be the same as the local size used in creating the 4297 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4298 calculated if N is given) For square matrices n is almost always m. 4299 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4300 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4301 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4302 . J - column indices 4303 - v - matrix values 4304 4305 Level: intermediate 4306 4307 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4308 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4309 @*/ 4310 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4311 { 4312 PetscErrorCode ierr; 4313 PetscInt cstart,nnz,i,j; 4314 PetscInt *ld; 4315 PetscBool nooffprocentries; 4316 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4317 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4318 PetscScalar *ad = Ad->a, *ao = Ao->a; 4319 const PetscInt *Adi = Ad->i; 4320 PetscInt ldi,Iii,md; 4321 4322 PetscFunctionBegin; 4323 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4324 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4325 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4326 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4327 4328 cstart = mat->cmap->rstart; 4329 if (!Aij->ld) { 4330 /* count number of entries below block diagonal */ 4331 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4332 Aij->ld = ld; 4333 for (i=0; i<m; i++) { 4334 nnz = Ii[i+1]- Ii[i]; 4335 j = 0; 4336 while (J[j] < cstart && j < nnz) {j++;} 4337 J += nnz; 4338 ld[i] = j; 4339 } 4340 } else { 4341 ld = Aij->ld; 4342 } 4343 4344 for (i=0; i<m; i++) { 4345 nnz = Ii[i+1]- Ii[i]; 4346 Iii = Ii[i]; 4347 ldi = ld[i]; 4348 md = Adi[i+1]-Adi[i]; 4349 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4350 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4351 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4352 ad += md; 4353 ao += nnz - md; 4354 } 4355 nooffprocentries = mat->nooffprocentries; 4356 mat->nooffprocentries = PETSC_TRUE; 4357 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4358 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4359 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4360 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4361 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4362 mat->nooffprocentries = nooffprocentries; 4363 PetscFunctionReturn(0); 4364 } 4365 4366 /*@C 4367 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4368 (the default parallel PETSc format). For good matrix assembly performance 4369 the user should preallocate the matrix storage by setting the parameters 4370 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4371 performance can be increased by more than a factor of 50. 4372 4373 Collective 4374 4375 Input Parameters: 4376 + comm - MPI communicator 4377 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4378 This value should be the same as the local size used in creating the 4379 y vector for the matrix-vector product y = Ax. 4380 . n - This value should be the same as the local size used in creating the 4381 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4382 calculated if N is given) For square matrices n is almost always m. 4383 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4384 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4385 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4386 (same value is used for all local rows) 4387 . d_nnz - array containing the number of nonzeros in the various rows of the 4388 DIAGONAL portion of the local submatrix (possibly different for each row) 4389 or NULL, if d_nz is used to specify the nonzero structure. 4390 The size of this array is equal to the number of local rows, i.e 'm'. 4391 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4392 submatrix (same value is used for all local rows). 4393 - o_nnz - array containing the number of nonzeros in the various rows of the 4394 OFF-DIAGONAL portion of the local submatrix (possibly different for 4395 each row) or NULL, if o_nz is used to specify the nonzero 4396 structure. The size of this array is equal to the number 4397 of local rows, i.e 'm'. 4398 4399 Output Parameter: 4400 . A - the matrix 4401 4402 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4403 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4404 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4405 4406 Notes: 4407 If the *_nnz parameter is given then the *_nz parameter is ignored 4408 4409 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4410 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4411 storage requirements for this matrix. 4412 4413 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4414 processor than it must be used on all processors that share the object for 4415 that argument. 4416 4417 The user MUST specify either the local or global matrix dimensions 4418 (possibly both). 4419 4420 The parallel matrix is partitioned across processors such that the 4421 first m0 rows belong to process 0, the next m1 rows belong to 4422 process 1, the next m2 rows belong to process 2 etc.. where 4423 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4424 values corresponding to [m x N] submatrix. 4425 4426 The columns are logically partitioned with the n0 columns belonging 4427 to 0th partition, the next n1 columns belonging to the next 4428 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4429 4430 The DIAGONAL portion of the local submatrix on any given processor 4431 is the submatrix corresponding to the rows and columns m,n 4432 corresponding to the given processor. i.e diagonal matrix on 4433 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4434 etc. The remaining portion of the local submatrix [m x (N-n)] 4435 constitute the OFF-DIAGONAL portion. The example below better 4436 illustrates this concept. 4437 4438 For a square global matrix we define each processor's diagonal portion 4439 to be its local rows and the corresponding columns (a square submatrix); 4440 each processor's off-diagonal portion encompasses the remainder of the 4441 local matrix (a rectangular submatrix). 4442 4443 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4444 4445 When calling this routine with a single process communicator, a matrix of 4446 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4447 type of communicator, use the construction mechanism 4448 .vb 4449 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4450 .ve 4451 4452 $ MatCreate(...,&A); 4453 $ MatSetType(A,MATMPIAIJ); 4454 $ MatSetSizes(A, m,n,M,N); 4455 $ MatMPIAIJSetPreallocation(A,...); 4456 4457 By default, this format uses inodes (identical nodes) when possible. 4458 We search for consecutive rows with the same nonzero structure, thereby 4459 reusing matrix information to achieve increased efficiency. 4460 4461 Options Database Keys: 4462 + -mat_no_inode - Do not use inodes 4463 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4464 4465 4466 4467 Example usage: 4468 4469 Consider the following 8x8 matrix with 34 non-zero values, that is 4470 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4471 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4472 as follows 4473 4474 .vb 4475 1 2 0 | 0 3 0 | 0 4 4476 Proc0 0 5 6 | 7 0 0 | 8 0 4477 9 0 10 | 11 0 0 | 12 0 4478 ------------------------------------- 4479 13 0 14 | 15 16 17 | 0 0 4480 Proc1 0 18 0 | 19 20 21 | 0 0 4481 0 0 0 | 22 23 0 | 24 0 4482 ------------------------------------- 4483 Proc2 25 26 27 | 0 0 28 | 29 0 4484 30 0 0 | 31 32 33 | 0 34 4485 .ve 4486 4487 This can be represented as a collection of submatrices as 4488 4489 .vb 4490 A B C 4491 D E F 4492 G H I 4493 .ve 4494 4495 Where the submatrices A,B,C are owned by proc0, D,E,F are 4496 owned by proc1, G,H,I are owned by proc2. 4497 4498 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4499 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4500 The 'M','N' parameters are 8,8, and have the same values on all procs. 4501 4502 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4503 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4504 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4505 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4506 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4507 matrix, ans [DF] as another SeqAIJ matrix. 4508 4509 When d_nz, o_nz parameters are specified, d_nz storage elements are 4510 allocated for every row of the local diagonal submatrix, and o_nz 4511 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4512 One way to choose d_nz and o_nz is to use the max nonzerors per local 4513 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4514 In this case, the values of d_nz,o_nz are 4515 .vb 4516 proc0 : dnz = 2, o_nz = 2 4517 proc1 : dnz = 3, o_nz = 2 4518 proc2 : dnz = 1, o_nz = 4 4519 .ve 4520 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4521 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4522 for proc3. i.e we are using 12+15+10=37 storage locations to store 4523 34 values. 4524 4525 When d_nnz, o_nnz parameters are specified, the storage is specified 4526 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4527 In the above case the values for d_nnz,o_nnz are 4528 .vb 4529 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4530 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4531 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4532 .ve 4533 Here the space allocated is sum of all the above values i.e 34, and 4534 hence pre-allocation is perfect. 4535 4536 Level: intermediate 4537 4538 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4539 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4540 @*/ 4541 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4542 { 4543 PetscErrorCode ierr; 4544 PetscMPIInt size; 4545 4546 PetscFunctionBegin; 4547 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4548 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4549 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4550 if (size > 1) { 4551 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4552 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4553 } else { 4554 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4555 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4556 } 4557 PetscFunctionReturn(0); 4558 } 4559 4560 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4561 { 4562 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4563 PetscBool flg; 4564 PetscErrorCode ierr; 4565 4566 PetscFunctionBegin; 4567 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4568 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4569 if (Ad) *Ad = a->A; 4570 if (Ao) *Ao = a->B; 4571 if (colmap) *colmap = a->garray; 4572 PetscFunctionReturn(0); 4573 } 4574 4575 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4576 { 4577 PetscErrorCode ierr; 4578 PetscInt m,N,i,rstart,nnz,Ii; 4579 PetscInt *indx; 4580 PetscScalar *values; 4581 4582 PetscFunctionBegin; 4583 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4584 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4585 PetscInt *dnz,*onz,sum,bs,cbs; 4586 4587 if (n == PETSC_DECIDE) { 4588 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4589 } 4590 /* Check sum(n) = N */ 4591 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4592 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4593 4594 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4595 rstart -= m; 4596 4597 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4598 for (i=0; i<m; i++) { 4599 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4600 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4601 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4602 } 4603 4604 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4605 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4606 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4607 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4608 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4609 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4610 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4611 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4612 } 4613 4614 /* numeric phase */ 4615 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4616 for (i=0; i<m; i++) { 4617 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4618 Ii = i + rstart; 4619 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4620 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4621 } 4622 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4623 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4624 PetscFunctionReturn(0); 4625 } 4626 4627 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4628 { 4629 PetscErrorCode ierr; 4630 PetscMPIInt rank; 4631 PetscInt m,N,i,rstart,nnz; 4632 size_t len; 4633 const PetscInt *indx; 4634 PetscViewer out; 4635 char *name; 4636 Mat B; 4637 const PetscScalar *values; 4638 4639 PetscFunctionBegin; 4640 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4641 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4642 /* Should this be the type of the diagonal block of A? */ 4643 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4644 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4645 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4646 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4647 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4648 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4649 for (i=0; i<m; i++) { 4650 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4651 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4652 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4653 } 4654 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4655 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4656 4657 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4658 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4659 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4660 sprintf(name,"%s.%d",outfile,rank); 4661 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4662 ierr = PetscFree(name);CHKERRQ(ierr); 4663 ierr = MatView(B,out);CHKERRQ(ierr); 4664 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4665 ierr = MatDestroy(&B);CHKERRQ(ierr); 4666 PetscFunctionReturn(0); 4667 } 4668 4669 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4670 { 4671 PetscErrorCode ierr; 4672 Mat_Merge_SeqsToMPI *merge; 4673 PetscContainer container; 4674 4675 PetscFunctionBegin; 4676 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4677 if (container) { 4678 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4679 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4680 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4681 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4682 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4683 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4684 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4685 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4686 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4687 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4688 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4689 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4690 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4691 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4692 ierr = PetscFree(merge);CHKERRQ(ierr); 4693 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4694 } 4695 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4696 PetscFunctionReturn(0); 4697 } 4698 4699 #include <../src/mat/utils/freespace.h> 4700 #include <petscbt.h> 4701 4702 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4703 { 4704 PetscErrorCode ierr; 4705 MPI_Comm comm; 4706 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4707 PetscMPIInt size,rank,taga,*len_s; 4708 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4709 PetscInt proc,m; 4710 PetscInt **buf_ri,**buf_rj; 4711 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4712 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4713 MPI_Request *s_waits,*r_waits; 4714 MPI_Status *status; 4715 MatScalar *aa=a->a; 4716 MatScalar **abuf_r,*ba_i; 4717 Mat_Merge_SeqsToMPI *merge; 4718 PetscContainer container; 4719 4720 PetscFunctionBegin; 4721 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4722 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4723 4724 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4725 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4726 4727 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4728 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4729 4730 bi = merge->bi; 4731 bj = merge->bj; 4732 buf_ri = merge->buf_ri; 4733 buf_rj = merge->buf_rj; 4734 4735 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4736 owners = merge->rowmap->range; 4737 len_s = merge->len_s; 4738 4739 /* send and recv matrix values */ 4740 /*-----------------------------*/ 4741 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4742 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4743 4744 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4745 for (proc=0,k=0; proc<size; proc++) { 4746 if (!len_s[proc]) continue; 4747 i = owners[proc]; 4748 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4749 k++; 4750 } 4751 4752 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4753 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4754 ierr = PetscFree(status);CHKERRQ(ierr); 4755 4756 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4757 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4758 4759 /* insert mat values of mpimat */ 4760 /*----------------------------*/ 4761 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4762 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4763 4764 for (k=0; k<merge->nrecv; k++) { 4765 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4766 nrows = *(buf_ri_k[k]); 4767 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4768 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4769 } 4770 4771 /* set values of ba */ 4772 m = merge->rowmap->n; 4773 for (i=0; i<m; i++) { 4774 arow = owners[rank] + i; 4775 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4776 bnzi = bi[i+1] - bi[i]; 4777 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4778 4779 /* add local non-zero vals of this proc's seqmat into ba */ 4780 anzi = ai[arow+1] - ai[arow]; 4781 aj = a->j + ai[arow]; 4782 aa = a->a + ai[arow]; 4783 nextaj = 0; 4784 for (j=0; nextaj<anzi; j++) { 4785 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4786 ba_i[j] += aa[nextaj++]; 4787 } 4788 } 4789 4790 /* add received vals into ba */ 4791 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4792 /* i-th row */ 4793 if (i == *nextrow[k]) { 4794 anzi = *(nextai[k]+1) - *nextai[k]; 4795 aj = buf_rj[k] + *(nextai[k]); 4796 aa = abuf_r[k] + *(nextai[k]); 4797 nextaj = 0; 4798 for (j=0; nextaj<anzi; j++) { 4799 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4800 ba_i[j] += aa[nextaj++]; 4801 } 4802 } 4803 nextrow[k]++; nextai[k]++; 4804 } 4805 } 4806 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4807 } 4808 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4809 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4810 4811 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4812 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4813 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4814 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4815 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4816 PetscFunctionReturn(0); 4817 } 4818 4819 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4820 { 4821 PetscErrorCode ierr; 4822 Mat B_mpi; 4823 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4824 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4825 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4826 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4827 PetscInt len,proc,*dnz,*onz,bs,cbs; 4828 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4829 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4830 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4831 MPI_Status *status; 4832 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4833 PetscBT lnkbt; 4834 Mat_Merge_SeqsToMPI *merge; 4835 PetscContainer container; 4836 4837 PetscFunctionBegin; 4838 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4839 4840 /* make sure it is a PETSc comm */ 4841 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4842 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4843 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4844 4845 ierr = PetscNew(&merge);CHKERRQ(ierr); 4846 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4847 4848 /* determine row ownership */ 4849 /*---------------------------------------------------------*/ 4850 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4851 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4852 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4853 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4854 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4855 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4856 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4857 4858 m = merge->rowmap->n; 4859 owners = merge->rowmap->range; 4860 4861 /* determine the number of messages to send, their lengths */ 4862 /*---------------------------------------------------------*/ 4863 len_s = merge->len_s; 4864 4865 len = 0; /* length of buf_si[] */ 4866 merge->nsend = 0; 4867 for (proc=0; proc<size; proc++) { 4868 len_si[proc] = 0; 4869 if (proc == rank) { 4870 len_s[proc] = 0; 4871 } else { 4872 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4873 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4874 } 4875 if (len_s[proc]) { 4876 merge->nsend++; 4877 nrows = 0; 4878 for (i=owners[proc]; i<owners[proc+1]; i++) { 4879 if (ai[i+1] > ai[i]) nrows++; 4880 } 4881 len_si[proc] = 2*(nrows+1); 4882 len += len_si[proc]; 4883 } 4884 } 4885 4886 /* determine the number and length of messages to receive for ij-structure */ 4887 /*-------------------------------------------------------------------------*/ 4888 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4889 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4890 4891 /* post the Irecv of j-structure */ 4892 /*-------------------------------*/ 4893 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4894 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4895 4896 /* post the Isend of j-structure */ 4897 /*--------------------------------*/ 4898 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4899 4900 for (proc=0, k=0; proc<size; proc++) { 4901 if (!len_s[proc]) continue; 4902 i = owners[proc]; 4903 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4904 k++; 4905 } 4906 4907 /* receives and sends of j-structure are complete */ 4908 /*------------------------------------------------*/ 4909 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4910 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4911 4912 /* send and recv i-structure */ 4913 /*---------------------------*/ 4914 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4915 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4916 4917 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4918 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4919 for (proc=0,k=0; proc<size; proc++) { 4920 if (!len_s[proc]) continue; 4921 /* form outgoing message for i-structure: 4922 buf_si[0]: nrows to be sent 4923 [1:nrows]: row index (global) 4924 [nrows+1:2*nrows+1]: i-structure index 4925 */ 4926 /*-------------------------------------------*/ 4927 nrows = len_si[proc]/2 - 1; 4928 buf_si_i = buf_si + nrows+1; 4929 buf_si[0] = nrows; 4930 buf_si_i[0] = 0; 4931 nrows = 0; 4932 for (i=owners[proc]; i<owners[proc+1]; i++) { 4933 anzi = ai[i+1] - ai[i]; 4934 if (anzi) { 4935 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4936 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4937 nrows++; 4938 } 4939 } 4940 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4941 k++; 4942 buf_si += len_si[proc]; 4943 } 4944 4945 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4946 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4947 4948 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4949 for (i=0; i<merge->nrecv; i++) { 4950 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4951 } 4952 4953 ierr = PetscFree(len_si);CHKERRQ(ierr); 4954 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4955 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4956 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4957 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4958 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4959 ierr = PetscFree(status);CHKERRQ(ierr); 4960 4961 /* compute a local seq matrix in each processor */ 4962 /*----------------------------------------------*/ 4963 /* allocate bi array and free space for accumulating nonzero column info */ 4964 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4965 bi[0] = 0; 4966 4967 /* create and initialize a linked list */ 4968 nlnk = N+1; 4969 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4970 4971 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4972 len = ai[owners[rank+1]] - ai[owners[rank]]; 4973 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4974 4975 current_space = free_space; 4976 4977 /* determine symbolic info for each local row */ 4978 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4979 4980 for (k=0; k<merge->nrecv; k++) { 4981 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4982 nrows = *buf_ri_k[k]; 4983 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4984 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4985 } 4986 4987 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4988 len = 0; 4989 for (i=0; i<m; i++) { 4990 bnzi = 0; 4991 /* add local non-zero cols of this proc's seqmat into lnk */ 4992 arow = owners[rank] + i; 4993 anzi = ai[arow+1] - ai[arow]; 4994 aj = a->j + ai[arow]; 4995 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4996 bnzi += nlnk; 4997 /* add received col data into lnk */ 4998 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4999 if (i == *nextrow[k]) { /* i-th row */ 5000 anzi = *(nextai[k]+1) - *nextai[k]; 5001 aj = buf_rj[k] + *nextai[k]; 5002 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5003 bnzi += nlnk; 5004 nextrow[k]++; nextai[k]++; 5005 } 5006 } 5007 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5008 5009 /* if free space is not available, make more free space */ 5010 if (current_space->local_remaining<bnzi) { 5011 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 5012 nspacedouble++; 5013 } 5014 /* copy data into free space, then initialize lnk */ 5015 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5016 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5017 5018 current_space->array += bnzi; 5019 current_space->local_used += bnzi; 5020 current_space->local_remaining -= bnzi; 5021 5022 bi[i+1] = bi[i] + bnzi; 5023 } 5024 5025 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5026 5027 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 5028 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5029 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5030 5031 /* create symbolic parallel matrix B_mpi */ 5032 /*---------------------------------------*/ 5033 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5034 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5035 if (n==PETSC_DECIDE) { 5036 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5037 } else { 5038 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5039 } 5040 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5041 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5042 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5043 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5044 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5045 5046 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5047 B_mpi->assembled = PETSC_FALSE; 5048 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5049 merge->bi = bi; 5050 merge->bj = bj; 5051 merge->buf_ri = buf_ri; 5052 merge->buf_rj = buf_rj; 5053 merge->coi = NULL; 5054 merge->coj = NULL; 5055 merge->owners_co = NULL; 5056 5057 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5058 5059 /* attach the supporting struct to B_mpi for reuse */ 5060 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5061 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5062 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5063 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5064 *mpimat = B_mpi; 5065 5066 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5067 PetscFunctionReturn(0); 5068 } 5069 5070 /*@C 5071 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5072 matrices from each processor 5073 5074 Collective 5075 5076 Input Parameters: 5077 + comm - the communicators the parallel matrix will live on 5078 . seqmat - the input sequential matrices 5079 . m - number of local rows (or PETSC_DECIDE) 5080 . n - number of local columns (or PETSC_DECIDE) 5081 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5082 5083 Output Parameter: 5084 . mpimat - the parallel matrix generated 5085 5086 Level: advanced 5087 5088 Notes: 5089 The dimensions of the sequential matrix in each processor MUST be the same. 5090 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5091 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5092 @*/ 5093 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5094 { 5095 PetscErrorCode ierr; 5096 PetscMPIInt size; 5097 5098 PetscFunctionBegin; 5099 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5100 if (size == 1) { 5101 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5102 if (scall == MAT_INITIAL_MATRIX) { 5103 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5104 } else { 5105 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5106 } 5107 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5108 PetscFunctionReturn(0); 5109 } 5110 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5111 if (scall == MAT_INITIAL_MATRIX) { 5112 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5113 } 5114 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5115 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5116 PetscFunctionReturn(0); 5117 } 5118 5119 /*@ 5120 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5121 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5122 with MatGetSize() 5123 5124 Not Collective 5125 5126 Input Parameters: 5127 + A - the matrix 5128 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5129 5130 Output Parameter: 5131 . A_loc - the local sequential matrix generated 5132 5133 Level: developer 5134 5135 Notes: 5136 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5137 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5138 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5139 modify the values of the returned A_loc. 5140 5141 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5142 5143 @*/ 5144 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5145 { 5146 PetscErrorCode ierr; 5147 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5148 Mat_SeqAIJ *mat,*a,*b; 5149 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5150 MatScalar *aa,*ba,*cam; 5151 PetscScalar *ca; 5152 PetscMPIInt size; 5153 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5154 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5155 PetscBool match; 5156 5157 PetscFunctionBegin; 5158 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5159 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5160 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 5161 if (size == 1) { 5162 if (scall == MAT_INITIAL_MATRIX) { 5163 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5164 *A_loc = mpimat->A; 5165 } else if (scall == MAT_REUSE_MATRIX) { 5166 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5167 } 5168 PetscFunctionReturn(0); 5169 } 5170 5171 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5172 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5173 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5174 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5175 aa = a->a; ba = b->a; 5176 if (scall == MAT_INITIAL_MATRIX) { 5177 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5178 ci[0] = 0; 5179 for (i=0; i<am; i++) { 5180 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5181 } 5182 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5183 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5184 k = 0; 5185 for (i=0; i<am; i++) { 5186 ncols_o = bi[i+1] - bi[i]; 5187 ncols_d = ai[i+1] - ai[i]; 5188 /* off-diagonal portion of A */ 5189 for (jo=0; jo<ncols_o; jo++) { 5190 col = cmap[*bj]; 5191 if (col >= cstart) break; 5192 cj[k] = col; bj++; 5193 ca[k++] = *ba++; 5194 } 5195 /* diagonal portion of A */ 5196 for (j=0; j<ncols_d; j++) { 5197 cj[k] = cstart + *aj++; 5198 ca[k++] = *aa++; 5199 } 5200 /* off-diagonal portion of A */ 5201 for (j=jo; j<ncols_o; j++) { 5202 cj[k] = cmap[*bj++]; 5203 ca[k++] = *ba++; 5204 } 5205 } 5206 /* put together the new matrix */ 5207 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5208 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5209 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5210 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5211 mat->free_a = PETSC_TRUE; 5212 mat->free_ij = PETSC_TRUE; 5213 mat->nonew = 0; 5214 } else if (scall == MAT_REUSE_MATRIX) { 5215 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5216 ci = mat->i; cj = mat->j; cam = mat->a; 5217 for (i=0; i<am; i++) { 5218 /* off-diagonal portion of A */ 5219 ncols_o = bi[i+1] - bi[i]; 5220 for (jo=0; jo<ncols_o; jo++) { 5221 col = cmap[*bj]; 5222 if (col >= cstart) break; 5223 *cam++ = *ba++; bj++; 5224 } 5225 /* diagonal portion of A */ 5226 ncols_d = ai[i+1] - ai[i]; 5227 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5228 /* off-diagonal portion of A */ 5229 for (j=jo; j<ncols_o; j++) { 5230 *cam++ = *ba++; bj++; 5231 } 5232 } 5233 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5234 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5235 PetscFunctionReturn(0); 5236 } 5237 5238 /*@C 5239 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5240 5241 Not Collective 5242 5243 Input Parameters: 5244 + A - the matrix 5245 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5246 - row, col - index sets of rows and columns to extract (or NULL) 5247 5248 Output Parameter: 5249 . A_loc - the local sequential matrix generated 5250 5251 Level: developer 5252 5253 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5254 5255 @*/ 5256 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5257 { 5258 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5259 PetscErrorCode ierr; 5260 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5261 IS isrowa,iscola; 5262 Mat *aloc; 5263 PetscBool match; 5264 5265 PetscFunctionBegin; 5266 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5267 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5268 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5269 if (!row) { 5270 start = A->rmap->rstart; end = A->rmap->rend; 5271 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5272 } else { 5273 isrowa = *row; 5274 } 5275 if (!col) { 5276 start = A->cmap->rstart; 5277 cmap = a->garray; 5278 nzA = a->A->cmap->n; 5279 nzB = a->B->cmap->n; 5280 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5281 ncols = 0; 5282 for (i=0; i<nzB; i++) { 5283 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5284 else break; 5285 } 5286 imark = i; 5287 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5288 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5289 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5290 } else { 5291 iscola = *col; 5292 } 5293 if (scall != MAT_INITIAL_MATRIX) { 5294 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5295 aloc[0] = *A_loc; 5296 } 5297 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5298 if (!col) { /* attach global id of condensed columns */ 5299 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5300 } 5301 *A_loc = aloc[0]; 5302 ierr = PetscFree(aloc);CHKERRQ(ierr); 5303 if (!row) { 5304 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5305 } 5306 if (!col) { 5307 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5308 } 5309 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5310 PetscFunctionReturn(0); 5311 } 5312 5313 /* 5314 * Destroy a mat that may be composed with PetscSF communication objects. 5315 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5316 * */ 5317 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5318 { 5319 PetscSF sf,osf; 5320 IS map; 5321 PetscErrorCode ierr; 5322 5323 PetscFunctionBegin; 5324 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5325 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5326 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5327 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5328 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5329 ierr = ISDestroy(&map);CHKERRQ(ierr); 5330 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5331 PetscFunctionReturn(0); 5332 } 5333 5334 /* 5335 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5336 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5337 * on a global size. 5338 * */ 5339 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5340 { 5341 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5342 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5343 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5344 PetscMPIInt owner; 5345 PetscSFNode *iremote,*oiremote; 5346 const PetscInt *lrowindices; 5347 PetscErrorCode ierr; 5348 PetscSF sf,osf; 5349 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5350 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5351 MPI_Comm comm; 5352 ISLocalToGlobalMapping mapping; 5353 5354 PetscFunctionBegin; 5355 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5356 /* plocalsize is the number of roots 5357 * nrows is the number of leaves 5358 * */ 5359 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5360 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5361 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5362 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5363 for (i=0;i<nrows;i++) { 5364 /* Find a remote index and an owner for a row 5365 * The row could be local or remote 5366 * */ 5367 owner = 0; 5368 lidx = 0; 5369 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5370 iremote[i].index = lidx; 5371 iremote[i].rank = owner; 5372 } 5373 /* Create SF to communicate how many nonzero columns for each row */ 5374 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5375 /* SF will figure out the number of nonzero colunms for each row, and their 5376 * offsets 5377 * */ 5378 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5379 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5380 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5381 5382 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5383 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5384 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5385 roffsets[0] = 0; 5386 roffsets[1] = 0; 5387 for (i=0;i<plocalsize;i++) { 5388 /* diag */ 5389 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5390 /* off diag */ 5391 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5392 /* compute offsets so that we relative location for each row */ 5393 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5394 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5395 } 5396 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5397 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5398 /* 'r' means root, and 'l' means leaf */ 5399 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5400 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5401 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5402 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5403 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5404 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5405 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5406 dntotalcols = 0; 5407 ontotalcols = 0; 5408 ncol = 0; 5409 for (i=0;i<nrows;i++) { 5410 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5411 ncol = PetscMax(pnnz[i],ncol); 5412 /* diag */ 5413 dntotalcols += nlcols[i*2+0]; 5414 /* off diag */ 5415 ontotalcols += nlcols[i*2+1]; 5416 } 5417 /* We do not need to figure the right number of columns 5418 * since all the calculations will be done by going through the raw data 5419 * */ 5420 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5421 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5422 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5423 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5424 /* diag */ 5425 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5426 /* off diag */ 5427 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5428 /* diag */ 5429 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5430 /* off diag */ 5431 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5432 dntotalcols = 0; 5433 ontotalcols = 0; 5434 ntotalcols = 0; 5435 for (i=0;i<nrows;i++) { 5436 owner = 0; 5437 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5438 /* Set iremote for diag matrix */ 5439 for (j=0;j<nlcols[i*2+0];j++) { 5440 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5441 iremote[dntotalcols].rank = owner; 5442 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5443 ilocal[dntotalcols++] = ntotalcols++; 5444 } 5445 /* off diag */ 5446 for (j=0;j<nlcols[i*2+1];j++) { 5447 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5448 oiremote[ontotalcols].rank = owner; 5449 oilocal[ontotalcols++] = ntotalcols++; 5450 } 5451 } 5452 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5453 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5454 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5455 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5456 /* P serves as roots and P_oth is leaves 5457 * Diag matrix 5458 * */ 5459 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5460 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5461 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5462 5463 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5464 /* Off diag */ 5465 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5466 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5467 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5468 /* We operate on the matrix internal data for saving memory */ 5469 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5470 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5471 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5472 /* Convert to global indices for diag matrix */ 5473 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5474 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5475 /* We want P_oth store global indices */ 5476 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5477 /* Use memory scalable approach */ 5478 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5479 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5480 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5481 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5482 /* Convert back to local indices */ 5483 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5484 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5485 nout = 0; 5486 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5487 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5488 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5489 /* Exchange values */ 5490 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5491 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5492 /* Stop PETSc from shrinking memory */ 5493 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5494 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5495 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5496 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5497 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5498 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5499 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5500 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5501 PetscFunctionReturn(0); 5502 } 5503 5504 /* 5505 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5506 * This supports MPIAIJ and MAIJ 5507 * */ 5508 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5509 { 5510 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5511 Mat_SeqAIJ *p_oth; 5512 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5513 IS rows,map; 5514 PetscHMapI hamp; 5515 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5516 MPI_Comm comm; 5517 PetscSF sf,osf; 5518 PetscBool has; 5519 PetscErrorCode ierr; 5520 5521 PetscFunctionBegin; 5522 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5523 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5524 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5525 * and then create a submatrix (that often is an overlapping matrix) 5526 * */ 5527 if (reuse==MAT_INITIAL_MATRIX) { 5528 /* Use a hash table to figure out unique keys */ 5529 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5530 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5531 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5532 count = 0; 5533 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5534 for (i=0;i<a->B->cmap->n;i++) { 5535 key = a->garray[i]/dof; 5536 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5537 if (!has) { 5538 mapping[i] = count; 5539 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5540 } else { 5541 /* Current 'i' has the same value the previous step */ 5542 mapping[i] = count-1; 5543 } 5544 } 5545 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5546 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5547 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5548 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5549 off = 0; 5550 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5551 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5552 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5553 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5554 /* In case, the matrix was already created but users want to recreate the matrix */ 5555 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5556 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5557 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5558 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5559 } else if (reuse==MAT_REUSE_MATRIX) { 5560 /* If matrix was already created, we simply update values using SF objects 5561 * that as attached to the matrix ealier. 5562 * */ 5563 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5564 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5565 if (!sf || !osf) { 5566 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5567 } 5568 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5569 /* Update values in place */ 5570 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5571 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5572 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5573 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5574 } else { 5575 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5576 } 5577 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5578 PetscFunctionReturn(0); 5579 } 5580 5581 /*@C 5582 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5583 5584 Collective on Mat 5585 5586 Input Parameters: 5587 + A,B - the matrices in mpiaij format 5588 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5589 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5590 5591 Output Parameter: 5592 + rowb, colb - index sets of rows and columns of B to extract 5593 - B_seq - the sequential matrix generated 5594 5595 Level: developer 5596 5597 @*/ 5598 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5599 { 5600 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5601 PetscErrorCode ierr; 5602 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5603 IS isrowb,iscolb; 5604 Mat *bseq=NULL; 5605 5606 PetscFunctionBegin; 5607 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5608 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5609 } 5610 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5611 5612 if (scall == MAT_INITIAL_MATRIX) { 5613 start = A->cmap->rstart; 5614 cmap = a->garray; 5615 nzA = a->A->cmap->n; 5616 nzB = a->B->cmap->n; 5617 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5618 ncols = 0; 5619 for (i=0; i<nzB; i++) { /* row < local row index */ 5620 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5621 else break; 5622 } 5623 imark = i; 5624 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5625 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5626 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5627 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5628 } else { 5629 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5630 isrowb = *rowb; iscolb = *colb; 5631 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5632 bseq[0] = *B_seq; 5633 } 5634 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5635 *B_seq = bseq[0]; 5636 ierr = PetscFree(bseq);CHKERRQ(ierr); 5637 if (!rowb) { 5638 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5639 } else { 5640 *rowb = isrowb; 5641 } 5642 if (!colb) { 5643 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5644 } else { 5645 *colb = iscolb; 5646 } 5647 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5648 PetscFunctionReturn(0); 5649 } 5650 5651 /* 5652 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5653 of the OFF-DIAGONAL portion of local A 5654 5655 Collective on Mat 5656 5657 Input Parameters: 5658 + A,B - the matrices in mpiaij format 5659 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5660 5661 Output Parameter: 5662 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5663 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5664 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5665 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5666 5667 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5668 for this matrix. This is not desirable.. 5669 5670 Level: developer 5671 5672 */ 5673 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5674 { 5675 PetscErrorCode ierr; 5676 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5677 Mat_SeqAIJ *b_oth; 5678 VecScatter ctx; 5679 MPI_Comm comm; 5680 const PetscMPIInt *rprocs,*sprocs; 5681 const PetscInt *srow,*rstarts,*sstarts; 5682 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5683 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5684 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5685 MPI_Request *rwaits = NULL,*swaits = NULL; 5686 MPI_Status rstatus; 5687 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5688 5689 PetscFunctionBegin; 5690 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5691 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5692 5693 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5694 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5695 } 5696 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5697 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5698 5699 if (size == 1) { 5700 startsj_s = NULL; 5701 bufa_ptr = NULL; 5702 *B_oth = NULL; 5703 PetscFunctionReturn(0); 5704 } 5705 5706 ctx = a->Mvctx; 5707 tag = ((PetscObject)ctx)->tag; 5708 5709 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5710 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5711 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5712 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5713 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5714 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5715 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5716 5717 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5718 if (scall == MAT_INITIAL_MATRIX) { 5719 /* i-array */ 5720 /*---------*/ 5721 /* post receives */ 5722 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5723 for (i=0; i<nrecvs; i++) { 5724 rowlen = rvalues + rstarts[i]*rbs; 5725 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5726 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5727 } 5728 5729 /* pack the outgoing message */ 5730 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5731 5732 sstartsj[0] = 0; 5733 rstartsj[0] = 0; 5734 len = 0; /* total length of j or a array to be sent */ 5735 if (nsends) { 5736 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5737 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5738 } 5739 for (i=0; i<nsends; i++) { 5740 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5741 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5742 for (j=0; j<nrows; j++) { 5743 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5744 for (l=0; l<sbs; l++) { 5745 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5746 5747 rowlen[j*sbs+l] = ncols; 5748 5749 len += ncols; 5750 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5751 } 5752 k++; 5753 } 5754 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5755 5756 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5757 } 5758 /* recvs and sends of i-array are completed */ 5759 i = nrecvs; 5760 while (i--) { 5761 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5762 } 5763 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5764 ierr = PetscFree(svalues);CHKERRQ(ierr); 5765 5766 /* allocate buffers for sending j and a arrays */ 5767 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5768 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5769 5770 /* create i-array of B_oth */ 5771 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5772 5773 b_othi[0] = 0; 5774 len = 0; /* total length of j or a array to be received */ 5775 k = 0; 5776 for (i=0; i<nrecvs; i++) { 5777 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5778 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5779 for (j=0; j<nrows; j++) { 5780 b_othi[k+1] = b_othi[k] + rowlen[j]; 5781 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5782 k++; 5783 } 5784 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5785 } 5786 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5787 5788 /* allocate space for j and a arrrays of B_oth */ 5789 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5790 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5791 5792 /* j-array */ 5793 /*---------*/ 5794 /* post receives of j-array */ 5795 for (i=0; i<nrecvs; i++) { 5796 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5797 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5798 } 5799 5800 /* pack the outgoing message j-array */ 5801 if (nsends) k = sstarts[0]; 5802 for (i=0; i<nsends; i++) { 5803 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5804 bufJ = bufj+sstartsj[i]; 5805 for (j=0; j<nrows; j++) { 5806 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5807 for (ll=0; ll<sbs; ll++) { 5808 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5809 for (l=0; l<ncols; l++) { 5810 *bufJ++ = cols[l]; 5811 } 5812 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5813 } 5814 } 5815 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5816 } 5817 5818 /* recvs and sends of j-array are completed */ 5819 i = nrecvs; 5820 while (i--) { 5821 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5822 } 5823 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5824 } else if (scall == MAT_REUSE_MATRIX) { 5825 sstartsj = *startsj_s; 5826 rstartsj = *startsj_r; 5827 bufa = *bufa_ptr; 5828 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5829 b_otha = b_oth->a; 5830 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5831 5832 /* a-array */ 5833 /*---------*/ 5834 /* post receives of a-array */ 5835 for (i=0; i<nrecvs; i++) { 5836 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5837 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5838 } 5839 5840 /* pack the outgoing message a-array */ 5841 if (nsends) k = sstarts[0]; 5842 for (i=0; i<nsends; i++) { 5843 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5844 bufA = bufa+sstartsj[i]; 5845 for (j=0; j<nrows; j++) { 5846 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5847 for (ll=0; ll<sbs; ll++) { 5848 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5849 for (l=0; l<ncols; l++) { 5850 *bufA++ = vals[l]; 5851 } 5852 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5853 } 5854 } 5855 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5856 } 5857 /* recvs and sends of a-array are completed */ 5858 i = nrecvs; 5859 while (i--) { 5860 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5861 } 5862 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5863 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5864 5865 if (scall == MAT_INITIAL_MATRIX) { 5866 /* put together the new matrix */ 5867 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5868 5869 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5870 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5871 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5872 b_oth->free_a = PETSC_TRUE; 5873 b_oth->free_ij = PETSC_TRUE; 5874 b_oth->nonew = 0; 5875 5876 ierr = PetscFree(bufj);CHKERRQ(ierr); 5877 if (!startsj_s || !bufa_ptr) { 5878 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5879 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5880 } else { 5881 *startsj_s = sstartsj; 5882 *startsj_r = rstartsj; 5883 *bufa_ptr = bufa; 5884 } 5885 } 5886 5887 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5888 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5889 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5890 PetscFunctionReturn(0); 5891 } 5892 5893 /*@C 5894 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5895 5896 Not Collective 5897 5898 Input Parameters: 5899 . A - The matrix in mpiaij format 5900 5901 Output Parameter: 5902 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5903 . colmap - A map from global column index to local index into lvec 5904 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5905 5906 Level: developer 5907 5908 @*/ 5909 #if defined(PETSC_USE_CTABLE) 5910 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5911 #else 5912 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5913 #endif 5914 { 5915 Mat_MPIAIJ *a; 5916 5917 PetscFunctionBegin; 5918 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5919 PetscValidPointer(lvec, 2); 5920 PetscValidPointer(colmap, 3); 5921 PetscValidPointer(multScatter, 4); 5922 a = (Mat_MPIAIJ*) A->data; 5923 if (lvec) *lvec = a->lvec; 5924 if (colmap) *colmap = a->colmap; 5925 if (multScatter) *multScatter = a->Mvctx; 5926 PetscFunctionReturn(0); 5927 } 5928 5929 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5932 #if defined(PETSC_HAVE_MKL_SPARSE) 5933 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5934 #endif 5935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5937 #if defined(PETSC_HAVE_ELEMENTAL) 5938 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5939 #endif 5940 #if defined(PETSC_HAVE_HYPRE) 5941 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5942 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5943 #endif 5944 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5945 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5946 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5947 5948 /* 5949 Computes (B'*A')' since computing B*A directly is untenable 5950 5951 n p p 5952 ( ) ( ) ( ) 5953 m ( A ) * n ( B ) = m ( C ) 5954 ( ) ( ) ( ) 5955 5956 */ 5957 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5958 { 5959 PetscErrorCode ierr; 5960 Mat At,Bt,Ct; 5961 5962 PetscFunctionBegin; 5963 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5964 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5965 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5966 ierr = MatDestroy(&At);CHKERRQ(ierr); 5967 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5968 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5969 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5970 PetscFunctionReturn(0); 5971 } 5972 5973 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5974 { 5975 PetscErrorCode ierr; 5976 PetscInt m=A->rmap->n,n=B->cmap->n; 5977 Mat Cmat; 5978 5979 PetscFunctionBegin; 5980 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5981 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5982 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5983 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5984 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5985 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5986 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5987 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5988 5989 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5990 5991 *C = Cmat; 5992 PetscFunctionReturn(0); 5993 } 5994 5995 /* ----------------------------------------------------------------*/ 5996 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5997 { 5998 PetscErrorCode ierr; 5999 6000 PetscFunctionBegin; 6001 if (scall == MAT_INITIAL_MATRIX) { 6002 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 6003 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 6004 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 6005 } 6006 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 6007 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 6008 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 6009 PetscFunctionReturn(0); 6010 } 6011 6012 /*MC 6013 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6014 6015 Options Database Keys: 6016 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6017 6018 Level: beginner 6019 6020 Notes: 6021 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6022 in this case the values associated with the rows and columns one passes in are set to zero 6023 in the matrix 6024 6025 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6026 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6027 6028 .seealso: MatCreateAIJ() 6029 M*/ 6030 6031 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6032 { 6033 Mat_MPIAIJ *b; 6034 PetscErrorCode ierr; 6035 PetscMPIInt size; 6036 6037 PetscFunctionBegin; 6038 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 6039 6040 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6041 B->data = (void*)b; 6042 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6043 B->assembled = PETSC_FALSE; 6044 B->insertmode = NOT_SET_VALUES; 6045 b->size = size; 6046 6047 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 6048 6049 /* build cache for off array entries formed */ 6050 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6051 6052 b->donotstash = PETSC_FALSE; 6053 b->colmap = 0; 6054 b->garray = 0; 6055 b->roworiented = PETSC_TRUE; 6056 6057 /* stuff used for matrix vector multiply */ 6058 b->lvec = NULL; 6059 b->Mvctx = NULL; 6060 6061 /* stuff for MatGetRow() */ 6062 b->rowindices = 0; 6063 b->rowvalues = 0; 6064 b->getrowactive = PETSC_FALSE; 6065 6066 /* flexible pointer used in CUSP/CUSPARSE classes */ 6067 b->spptr = NULL; 6068 6069 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6070 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6071 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6072 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6073 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6074 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6075 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6076 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6077 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6078 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6079 #if defined(PETSC_HAVE_MKL_SPARSE) 6080 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6081 #endif 6082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6085 #if defined(PETSC_HAVE_ELEMENTAL) 6086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6087 #endif 6088 #if defined(PETSC_HAVE_HYPRE) 6089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6090 #endif 6091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 6094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 6095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 6096 #if defined(PETSC_HAVE_HYPRE) 6097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6098 #endif 6099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 6100 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6101 PetscFunctionReturn(0); 6102 } 6103 6104 /*@C 6105 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6106 and "off-diagonal" part of the matrix in CSR format. 6107 6108 Collective 6109 6110 Input Parameters: 6111 + comm - MPI communicator 6112 . m - number of local rows (Cannot be PETSC_DECIDE) 6113 . n - This value should be the same as the local size used in creating the 6114 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6115 calculated if N is given) For square matrices n is almost always m. 6116 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6117 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6118 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6119 . j - column indices 6120 . a - matrix values 6121 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6122 . oj - column indices 6123 - oa - matrix values 6124 6125 Output Parameter: 6126 . mat - the matrix 6127 6128 Level: advanced 6129 6130 Notes: 6131 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6132 must free the arrays once the matrix has been destroyed and not before. 6133 6134 The i and j indices are 0 based 6135 6136 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6137 6138 This sets local rows and cannot be used to set off-processor values. 6139 6140 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6141 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6142 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6143 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6144 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6145 communication if it is known that only local entries will be set. 6146 6147 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6148 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6149 @*/ 6150 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6151 { 6152 PetscErrorCode ierr; 6153 Mat_MPIAIJ *maij; 6154 6155 PetscFunctionBegin; 6156 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6157 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6158 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6159 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6160 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6161 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6162 maij = (Mat_MPIAIJ*) (*mat)->data; 6163 6164 (*mat)->preallocated = PETSC_TRUE; 6165 6166 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6167 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6168 6169 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6170 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6171 6172 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6173 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6174 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6175 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6176 6177 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6178 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6179 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6180 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6181 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6182 PetscFunctionReturn(0); 6183 } 6184 6185 /* 6186 Special version for direct calls from Fortran 6187 */ 6188 #include <petsc/private/fortranimpl.h> 6189 6190 /* Change these macros so can be used in void function */ 6191 #undef CHKERRQ 6192 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6193 #undef SETERRQ2 6194 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6195 #undef SETERRQ3 6196 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6197 #undef SETERRQ 6198 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6199 6200 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6201 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6202 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6203 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6204 #else 6205 #endif 6206 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6207 { 6208 Mat mat = *mmat; 6209 PetscInt m = *mm, n = *mn; 6210 InsertMode addv = *maddv; 6211 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6212 PetscScalar value; 6213 PetscErrorCode ierr; 6214 6215 MatCheckPreallocated(mat,1); 6216 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6217 6218 #if defined(PETSC_USE_DEBUG) 6219 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6220 #endif 6221 { 6222 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6223 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6224 PetscBool roworiented = aij->roworiented; 6225 6226 /* Some Variables required in the macro */ 6227 Mat A = aij->A; 6228 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6229 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6230 MatScalar *aa = a->a; 6231 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6232 Mat B = aij->B; 6233 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6234 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6235 MatScalar *ba = b->a; 6236 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6237 * cannot use "#if defined" inside a macro. */ 6238 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6239 6240 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6241 PetscInt nonew = a->nonew; 6242 MatScalar *ap1,*ap2; 6243 6244 PetscFunctionBegin; 6245 for (i=0; i<m; i++) { 6246 if (im[i] < 0) continue; 6247 #if defined(PETSC_USE_DEBUG) 6248 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6249 #endif 6250 if (im[i] >= rstart && im[i] < rend) { 6251 row = im[i] - rstart; 6252 lastcol1 = -1; 6253 rp1 = aj + ai[row]; 6254 ap1 = aa + ai[row]; 6255 rmax1 = aimax[row]; 6256 nrow1 = ailen[row]; 6257 low1 = 0; 6258 high1 = nrow1; 6259 lastcol2 = -1; 6260 rp2 = bj + bi[row]; 6261 ap2 = ba + bi[row]; 6262 rmax2 = bimax[row]; 6263 nrow2 = bilen[row]; 6264 low2 = 0; 6265 high2 = nrow2; 6266 6267 for (j=0; j<n; j++) { 6268 if (roworiented) value = v[i*n+j]; 6269 else value = v[i+j*m]; 6270 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6271 if (in[j] >= cstart && in[j] < cend) { 6272 col = in[j] - cstart; 6273 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6274 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6275 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6276 #endif 6277 } else if (in[j] < 0) continue; 6278 #if defined(PETSC_USE_DEBUG) 6279 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6280 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 6281 #endif 6282 else { 6283 if (mat->was_assembled) { 6284 if (!aij->colmap) { 6285 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6286 } 6287 #if defined(PETSC_USE_CTABLE) 6288 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6289 col--; 6290 #else 6291 col = aij->colmap[in[j]] - 1; 6292 #endif 6293 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6294 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6295 col = in[j]; 6296 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6297 B = aij->B; 6298 b = (Mat_SeqAIJ*)B->data; 6299 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6300 rp2 = bj + bi[row]; 6301 ap2 = ba + bi[row]; 6302 rmax2 = bimax[row]; 6303 nrow2 = bilen[row]; 6304 low2 = 0; 6305 high2 = nrow2; 6306 bm = aij->B->rmap->n; 6307 ba = b->a; 6308 inserted = PETSC_FALSE; 6309 } 6310 } else col = in[j]; 6311 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6312 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6313 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6314 #endif 6315 } 6316 } 6317 } else if (!aij->donotstash) { 6318 if (roworiented) { 6319 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6320 } else { 6321 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6322 } 6323 } 6324 } 6325 } 6326 PetscFunctionReturnVoid(); 6327 } 6328