1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatPinToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->pinnedtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatPinToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatPinToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 #if defined(PETSC_USE_DEBUG) 583 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 584 #endif 585 if (im[i] >= rstart && im[i] < rend) { 586 row = im[i] - rstart; 587 lastcol1 = -1; 588 rp1 = aj + ai[row]; 589 ap1 = aa + ai[row]; 590 rmax1 = aimax[row]; 591 nrow1 = ailen[row]; 592 low1 = 0; 593 high1 = nrow1; 594 lastcol2 = -1; 595 rp2 = bj + bi[row]; 596 ap2 = ba + bi[row]; 597 rmax2 = bimax[row]; 598 nrow2 = bilen[row]; 599 low2 = 0; 600 high2 = nrow2; 601 602 for (j=0; j<n; j++) { 603 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 604 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 605 if (in[j] >= cstart && in[j] < cend) { 606 col = in[j] - cstart; 607 nonew = a->nonew; 608 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 609 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 610 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 611 #endif 612 } else if (in[j] < 0) continue; 613 #if defined(PETSC_USE_DEBUG) 614 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 615 #endif 616 else { 617 if (mat->was_assembled) { 618 if (!aij->colmap) { 619 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 620 } 621 #if defined(PETSC_USE_CTABLE) 622 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 623 col--; 624 #else 625 col = aij->colmap[in[j]] - 1; 626 #endif 627 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 628 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 629 col = in[j]; 630 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 631 B = aij->B; 632 b = (Mat_SeqAIJ*)B->data; 633 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 634 rp2 = bj + bi[row]; 635 ap2 = ba + bi[row]; 636 rmax2 = bimax[row]; 637 nrow2 = bilen[row]; 638 low2 = 0; 639 high2 = nrow2; 640 bm = aij->B->rmap->n; 641 ba = b->a; 642 inserted = PETSC_FALSE; 643 } else if (col < 0) { 644 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 645 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 646 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 647 } 648 } else col = in[j]; 649 nonew = b->nonew; 650 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 651 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 652 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 653 #endif 654 } 655 } 656 } else { 657 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 658 if (!aij->donotstash) { 659 mat->assembled = PETSC_FALSE; 660 if (roworiented) { 661 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 662 } else { 663 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 664 } 665 } 666 } 667 } 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 673 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 674 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 675 */ 676 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 677 { 678 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 679 Mat A = aij->A; /* diagonal part of the matrix */ 680 Mat B = aij->B; /* offdiagonal part of the matrix */ 681 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 682 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 683 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 684 PetscInt *ailen = a->ilen,*aj = a->j; 685 PetscInt *bilen = b->ilen,*bj = b->j; 686 PetscInt am = aij->A->rmap->n,j; 687 PetscInt diag_so_far = 0,dnz; 688 PetscInt offd_so_far = 0,onz; 689 690 PetscFunctionBegin; 691 /* Iterate over all rows of the matrix */ 692 for (j=0; j<am; j++) { 693 dnz = onz = 0; 694 /* Iterate over all non-zero columns of the current row */ 695 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 696 /* If column is in the diagonal */ 697 if (mat_j[col] >= cstart && mat_j[col] < cend) { 698 aj[diag_so_far++] = mat_j[col] - cstart; 699 dnz++; 700 } else { /* off-diagonal entries */ 701 bj[offd_so_far++] = mat_j[col]; 702 onz++; 703 } 704 } 705 ailen[j] = dnz; 706 bilen[j] = onz; 707 } 708 PetscFunctionReturn(0); 709 } 710 711 /* 712 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 713 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 714 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 715 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 716 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 717 */ 718 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat A = aij->A; /* diagonal part of the matrix */ 722 Mat B = aij->B; /* offdiagonal part of the matrix */ 723 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 724 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 725 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 726 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 727 PetscInt *ailen = a->ilen,*aj = a->j; 728 PetscInt *bilen = b->ilen,*bj = b->j; 729 PetscInt am = aij->A->rmap->n,j; 730 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 731 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 732 PetscScalar *aa = a->a,*ba = b->a; 733 734 PetscFunctionBegin; 735 /* Iterate over all rows of the matrix */ 736 for (j=0; j<am; j++) { 737 dnz_row = onz_row = 0; 738 rowstart_offd = full_offd_i[j]; 739 rowstart_diag = full_diag_i[j]; 740 /* Iterate over all non-zero columns of the current row */ 741 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 742 /* If column is in the diagonal */ 743 if (mat_j[col] >= cstart && mat_j[col] < cend) { 744 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 745 aa[rowstart_diag+dnz_row] = mat_a[col]; 746 dnz_row++; 747 } else { /* off-diagonal entries */ 748 bj[rowstart_offd+onz_row] = mat_j[col]; 749 ba[rowstart_offd+onz_row] = mat_a[col]; 750 onz_row++; 751 } 752 } 753 ailen[j] = dnz_row; 754 bilen[j] = onz_row; 755 } 756 PetscFunctionReturn(0); 757 } 758 759 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 760 { 761 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 762 PetscErrorCode ierr; 763 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 764 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 765 766 PetscFunctionBegin; 767 for (i=0; i<m; i++) { 768 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 769 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 770 if (idxm[i] >= rstart && idxm[i] < rend) { 771 row = idxm[i] - rstart; 772 for (j=0; j<n; j++) { 773 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 774 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 775 if (idxn[j] >= cstart && idxn[j] < cend) { 776 col = idxn[j] - cstart; 777 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 778 } else { 779 if (!aij->colmap) { 780 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 781 } 782 #if defined(PETSC_USE_CTABLE) 783 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 784 col--; 785 #else 786 col = aij->colmap[idxn[j]] - 1; 787 #endif 788 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 789 else { 790 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 791 } 792 } 793 } 794 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 795 } 796 PetscFunctionReturn(0); 797 } 798 799 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 800 801 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 802 { 803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 804 PetscErrorCode ierr; 805 PetscInt nstash,reallocs; 806 807 PetscFunctionBegin; 808 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 809 810 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 811 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 812 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 813 PetscFunctionReturn(0); 814 } 815 816 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 817 { 818 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 819 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 820 PetscErrorCode ierr; 821 PetscMPIInt n; 822 PetscInt i,j,rstart,ncols,flg; 823 PetscInt *row,*col; 824 PetscBool other_disassembled; 825 PetscScalar *val; 826 827 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 828 829 PetscFunctionBegin; 830 if (!aij->donotstash && !mat->nooffprocentries) { 831 while (1) { 832 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 833 if (!flg) break; 834 835 for (i=0; i<n; ) { 836 /* Now identify the consecutive vals belonging to the same row */ 837 for (j=i,rstart=row[j]; j<n; j++) { 838 if (row[j] != rstart) break; 839 } 840 if (j < n) ncols = j-i; 841 else ncols = n-i; 842 /* Now assemble all these values with a single function call */ 843 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 844 845 i = j; 846 } 847 } 848 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 849 } 850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 851 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 852 #endif 853 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 855 856 /* determine if any processor has disassembled, if so we must 857 also disassemble ourself, in order that we may reassemble. */ 858 /* 859 if nonzero structure of submatrix B cannot change then we know that 860 no processor disassembled thus we can skip this stuff 861 */ 862 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 863 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 864 if (mat->was_assembled && !other_disassembled) { 865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 866 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 867 #endif 868 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 869 } 870 } 871 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 872 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 876 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 877 #endif 878 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 879 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 880 881 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 882 883 aij->rowvalues = 0; 884 885 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 886 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 890 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 891 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 892 } 893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 894 mat->offloadmask = PETSC_OFFLOAD_BOTH; 895 #endif 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 900 { 901 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 906 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 913 PetscObjectState sA, sB; 914 PetscInt *lrows; 915 PetscInt r, len; 916 PetscBool cong, lch, gch; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* get locally owned rows */ 921 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 922 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 923 /* fix right hand side if needed */ 924 if (x && b) { 925 const PetscScalar *xx; 926 PetscScalar *bb; 927 928 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 929 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 930 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 931 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 932 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 933 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 934 } 935 936 sA = mat->A->nonzerostate; 937 sB = mat->B->nonzerostate; 938 939 if (diag != 0.0 && cong) { 940 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 941 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 942 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 943 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 944 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 945 PetscInt nnwA, nnwB; 946 PetscBool nnzA, nnzB; 947 948 nnwA = aijA->nonew; 949 nnwB = aijB->nonew; 950 nnzA = aijA->keepnonzeropattern; 951 nnzB = aijB->keepnonzeropattern; 952 if (!nnzA) { 953 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 954 aijA->nonew = 0; 955 } 956 if (!nnzB) { 957 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 958 aijB->nonew = 0; 959 } 960 /* Must zero here before the next loop */ 961 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 962 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 for (r = 0; r < len; ++r) { 964 const PetscInt row = lrows[r] + A->rmap->rstart; 965 if (row >= A->cmap->N) continue; 966 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 967 } 968 aijA->nonew = nnwA; 969 aijB->nonew = nnwB; 970 } else { 971 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 972 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 } 974 ierr = PetscFree(lrows);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 978 /* reduce nonzerostate */ 979 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 980 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 981 if (gch) A->nonzerostate++; 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 986 { 987 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 988 PetscErrorCode ierr; 989 PetscMPIInt n = A->rmap->n; 990 PetscInt i,j,r,m,p = 0,len = 0; 991 PetscInt *lrows,*owners = A->rmap->range; 992 PetscSFNode *rrows; 993 PetscSF sf; 994 const PetscScalar *xx; 995 PetscScalar *bb,*mask; 996 Vec xmask,lmask; 997 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 998 const PetscInt *aj, *ii,*ridx; 999 PetscScalar *aa; 1000 1001 PetscFunctionBegin; 1002 /* Create SF where leaves are input rows and roots are owned rows */ 1003 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1004 for (r = 0; r < n; ++r) lrows[r] = -1; 1005 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1006 for (r = 0; r < N; ++r) { 1007 const PetscInt idx = rows[r]; 1008 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1009 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1010 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1011 } 1012 rrows[r].rank = p; 1013 rrows[r].index = rows[r] - owners[p]; 1014 } 1015 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1016 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1017 /* Collect flags for rows to be zeroed */ 1018 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1019 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1020 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1021 /* Compress and put in row numbers */ 1022 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1023 /* zero diagonal part of matrix */ 1024 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1025 /* handle off diagonal part of matrix */ 1026 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1027 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1028 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1029 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1030 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1031 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1034 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1035 PetscBool cong; 1036 1037 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1038 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1039 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1040 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1042 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1043 } 1044 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1045 /* remove zeroed rows of off diagonal matrix */ 1046 ii = aij->i; 1047 for (i=0; i<len; i++) { 1048 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1049 } 1050 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1051 if (aij->compressedrow.use) { 1052 m = aij->compressedrow.nrows; 1053 ii = aij->compressedrow.i; 1054 ridx = aij->compressedrow.rindex; 1055 for (i=0; i<m; i++) { 1056 n = ii[i+1] - ii[i]; 1057 aj = aij->j + ii[i]; 1058 aa = aij->a + ii[i]; 1059 1060 for (j=0; j<n; j++) { 1061 if (PetscAbsScalar(mask[*aj])) { 1062 if (b) bb[*ridx] -= *aa*xx[*aj]; 1063 *aa = 0.0; 1064 } 1065 aa++; 1066 aj++; 1067 } 1068 ridx++; 1069 } 1070 } else { /* do not use compressed row format */ 1071 m = l->B->rmap->n; 1072 for (i=0; i<m; i++) { 1073 n = ii[i+1] - ii[i]; 1074 aj = aij->j + ii[i]; 1075 aa = aij->a + ii[i]; 1076 for (j=0; j<n; j++) { 1077 if (PetscAbsScalar(mask[*aj])) { 1078 if (b) bb[i] -= *aa*xx[*aj]; 1079 *aa = 0.0; 1080 } 1081 aa++; 1082 aj++; 1083 } 1084 } 1085 } 1086 if (x && b) { 1087 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1088 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1089 } 1090 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1091 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1092 ierr = PetscFree(lrows);CHKERRQ(ierr); 1093 1094 /* only change matrix nonzero state if pattern was allowed to be changed */ 1095 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1096 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1097 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1098 } 1099 PetscFunctionReturn(0); 1100 } 1101 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 VecScatter Mvctx = a->Mvctx; 1108 1109 PetscFunctionBegin; 1110 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1111 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1112 1113 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1115 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 VecScatter Mvctx = a->Mvctx; 1135 1136 PetscFunctionBegin; 1137 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1138 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 /* do local part */ 1154 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1155 /* add partial results together */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1162 { 1163 MPI_Comm comm; 1164 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1165 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1166 IS Me,Notme; 1167 PetscErrorCode ierr; 1168 PetscInt M,N,first,last,*notme,i; 1169 PetscBool lf; 1170 PetscMPIInt size; 1171 1172 PetscFunctionBegin; 1173 /* Easy test: symmetric diagonal block */ 1174 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1175 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1176 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1177 if (!*f) PetscFunctionReturn(0); 1178 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1180 if (size == 1) PetscFunctionReturn(0); 1181 1182 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1183 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1184 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1185 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1186 for (i=0; i<first; i++) notme[i] = i; 1187 for (i=last; i<M; i++) notme[i-last+first] = i; 1188 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1189 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1190 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1191 Aoff = Aoffs[0]; 1192 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1193 Boff = Boffs[0]; 1194 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1195 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1196 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1197 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1198 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1199 ierr = PetscFree(notme);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1204 { 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1213 { 1214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 /* do nondiagonal part */ 1219 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1220 /* do local part */ 1221 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1222 /* add partial results together */ 1223 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /* 1229 This only works correctly for square matrices where the subblock A->A is the 1230 diagonal block 1231 */ 1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1233 { 1234 PetscErrorCode ierr; 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 1237 PetscFunctionBegin; 1238 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1239 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1240 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1245 { 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1251 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1256 { 1257 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 #if defined(PETSC_USE_LOG) 1262 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1263 #endif 1264 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1266 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1267 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1268 #if defined(PETSC_USE_CTABLE) 1269 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1270 #else 1271 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1272 #endif 1273 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1274 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1275 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1276 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1277 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1278 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1279 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1280 1281 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1290 #if defined(PETSC_HAVE_ELEMENTAL) 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1292 #endif 1293 #if defined(PETSC_HAVE_HYPRE) 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1296 #endif 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1299 PetscFunctionReturn(0); 1300 } 1301 1302 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1303 { 1304 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1305 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1306 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1307 PetscErrorCode ierr; 1308 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1309 int fd; 1310 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1311 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1312 PetscScalar *column_values; 1313 PetscInt message_count,flowcontrolcount; 1314 FILE *file; 1315 1316 PetscFunctionBegin; 1317 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1318 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1319 nz = A->nz + B->nz; 1320 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1321 if (!rank) { 1322 header[0] = MAT_FILE_CLASSID; 1323 header[1] = mat->rmap->N; 1324 header[2] = mat->cmap->N; 1325 1326 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1327 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1328 /* get largest number of rows any processor has */ 1329 rlen = mat->rmap->n; 1330 range = mat->rmap->range; 1331 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1332 } else { 1333 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1334 rlen = mat->rmap->n; 1335 } 1336 1337 /* load up the local row counts */ 1338 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1339 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1340 1341 /* store the row lengths to the file */ 1342 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1343 if (!rank) { 1344 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1345 for (i=1; i<size; i++) { 1346 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1347 rlen = range[i+1] - range[i]; 1348 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1349 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1350 } 1351 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1352 } else { 1353 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1354 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1355 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1356 } 1357 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1358 1359 /* load up the local column indices */ 1360 nzmax = nz; /* th processor needs space a largest processor needs */ 1361 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1362 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1363 cnt = 0; 1364 for (i=0; i<mat->rmap->n; i++) { 1365 for (j=B->i[i]; j<B->i[i+1]; j++) { 1366 if ((col = garray[B->j[j]]) > cstart) break; 1367 column_indices[cnt++] = col; 1368 } 1369 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1370 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1371 } 1372 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1373 1374 /* store the column indices to the file */ 1375 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1376 if (!rank) { 1377 MPI_Status status; 1378 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1379 for (i=1; i<size; i++) { 1380 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1381 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1382 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1383 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1384 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1385 } 1386 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1387 } else { 1388 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1389 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1390 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1391 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1392 } 1393 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1394 1395 /* load up the local column values */ 1396 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1397 cnt = 0; 1398 for (i=0; i<mat->rmap->n; i++) { 1399 for (j=B->i[i]; j<B->i[i+1]; j++) { 1400 if (garray[B->j[j]] > cstart) break; 1401 column_values[cnt++] = B->a[j]; 1402 } 1403 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1404 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1405 } 1406 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1407 1408 /* store the column values to the file */ 1409 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1410 if (!rank) { 1411 MPI_Status status; 1412 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1413 for (i=1; i<size; i++) { 1414 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1415 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1416 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1417 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1418 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1419 } 1420 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1421 } else { 1422 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1423 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1424 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1425 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1426 } 1427 ierr = PetscFree(column_values);CHKERRQ(ierr); 1428 1429 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1430 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #include <petscdraw.h> 1435 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1436 { 1437 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1438 PetscErrorCode ierr; 1439 PetscMPIInt rank = aij->rank,size = aij->size; 1440 PetscBool isdraw,iascii,isbinary; 1441 PetscViewer sviewer; 1442 PetscViewerFormat format; 1443 1444 PetscFunctionBegin; 1445 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1446 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1447 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1448 if (iascii) { 1449 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1450 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1451 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1452 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1453 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1454 for (i=0; i<(PetscInt)size; i++) { 1455 nmax = PetscMax(nmax,nz[i]); 1456 nmin = PetscMin(nmin,nz[i]); 1457 navg += nz[i]; 1458 } 1459 ierr = PetscFree(nz);CHKERRQ(ierr); 1460 navg = navg/size; 1461 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1462 PetscFunctionReturn(0); 1463 } 1464 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1465 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1466 MatInfo info; 1467 PetscBool inodes; 1468 1469 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1470 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1471 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1472 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1473 if (!inodes) { 1474 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1475 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1476 } else { 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1478 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1479 } 1480 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1481 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1482 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1483 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1484 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1485 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1486 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1487 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1488 PetscFunctionReturn(0); 1489 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1490 PetscInt inodecount,inodelimit,*inodes; 1491 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1492 if (inodes) { 1493 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1494 } else { 1495 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1496 } 1497 PetscFunctionReturn(0); 1498 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1499 PetscFunctionReturn(0); 1500 } 1501 } else if (isbinary) { 1502 if (size == 1) { 1503 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1504 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1505 } else { 1506 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1507 } 1508 PetscFunctionReturn(0); 1509 } else if (iascii && size == 1) { 1510 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1511 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1512 PetscFunctionReturn(0); 1513 } else if (isdraw) { 1514 PetscDraw draw; 1515 PetscBool isnull; 1516 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1517 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1518 if (isnull) PetscFunctionReturn(0); 1519 } 1520 1521 { /* assemble the entire matrix onto first processor */ 1522 Mat A = NULL, Av; 1523 IS isrow,iscol; 1524 1525 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1526 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1527 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1528 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1529 /* The commented code uses MatCreateSubMatrices instead */ 1530 /* 1531 Mat *AA, A = NULL, Av; 1532 IS isrow,iscol; 1533 1534 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1535 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1536 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1537 if (!rank) { 1538 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1539 A = AA[0]; 1540 Av = AA[0]; 1541 } 1542 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1543 */ 1544 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1545 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1546 /* 1547 Everyone has to call to draw the matrix since the graphics waits are 1548 synchronized across all processors that share the PetscDraw object 1549 */ 1550 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1551 if (!rank) { 1552 if (((PetscObject)mat)->name) { 1553 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1554 } 1555 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1556 } 1557 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1558 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1559 ierr = MatDestroy(&A);CHKERRQ(ierr); 1560 } 1561 PetscFunctionReturn(0); 1562 } 1563 1564 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1565 { 1566 PetscErrorCode ierr; 1567 PetscBool iascii,isdraw,issocket,isbinary; 1568 1569 PetscFunctionBegin; 1570 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1571 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1572 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1573 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1574 if (iascii || isdraw || isbinary || issocket) { 1575 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1581 { 1582 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1583 PetscErrorCode ierr; 1584 Vec bb1 = 0; 1585 PetscBool hasop; 1586 1587 PetscFunctionBegin; 1588 if (flag == SOR_APPLY_UPPER) { 1589 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1590 PetscFunctionReturn(0); 1591 } 1592 1593 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1594 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1595 } 1596 1597 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1598 if (flag & SOR_ZERO_INITIAL_GUESS) { 1599 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1600 its--; 1601 } 1602 1603 while (its--) { 1604 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 1607 /* update rhs: bb1 = bb - B*x */ 1608 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1609 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1610 1611 /* local sweep */ 1612 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1613 } 1614 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1615 if (flag & SOR_ZERO_INITIAL_GUESS) { 1616 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1617 its--; 1618 } 1619 while (its--) { 1620 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1621 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1622 1623 /* update rhs: bb1 = bb - B*x */ 1624 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1625 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1626 1627 /* local sweep */ 1628 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1629 } 1630 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1631 if (flag & SOR_ZERO_INITIAL_GUESS) { 1632 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1633 its--; 1634 } 1635 while (its--) { 1636 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1637 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1638 1639 /* update rhs: bb1 = bb - B*x */ 1640 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1641 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1642 1643 /* local sweep */ 1644 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1645 } 1646 } else if (flag & SOR_EISENSTAT) { 1647 Vec xx1; 1648 1649 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1650 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1651 1652 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1653 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1654 if (!mat->diag) { 1655 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1656 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1657 } 1658 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1659 if (hasop) { 1660 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1661 } else { 1662 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1663 } 1664 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1665 1666 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1667 1668 /* local sweep */ 1669 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1670 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1671 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1672 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1673 1674 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1675 1676 matin->factorerrortype = mat->A->factorerrortype; 1677 PetscFunctionReturn(0); 1678 } 1679 1680 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1681 { 1682 Mat aA,aB,Aperm; 1683 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1684 PetscScalar *aa,*ba; 1685 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1686 PetscSF rowsf,sf; 1687 IS parcolp = NULL; 1688 PetscBool done; 1689 PetscErrorCode ierr; 1690 1691 PetscFunctionBegin; 1692 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1693 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1694 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1695 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1696 1697 /* Invert row permutation to find out where my rows should go */ 1698 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1699 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1700 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1701 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1702 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1703 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1704 1705 /* Invert column permutation to find out where my columns should go */ 1706 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1707 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1708 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1709 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1710 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1711 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1712 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1713 1714 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1715 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1716 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1717 1718 /* Find out where my gcols should go */ 1719 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1720 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1721 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1722 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1723 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1724 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1725 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1726 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1727 1728 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1729 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1730 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1731 for (i=0; i<m; i++) { 1732 PetscInt row = rdest[i],rowner; 1733 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1734 for (j=ai[i]; j<ai[i+1]; j++) { 1735 PetscInt cowner,col = cdest[aj[j]]; 1736 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1737 if (rowner == cowner) dnnz[i]++; 1738 else onnz[i]++; 1739 } 1740 for (j=bi[i]; j<bi[i+1]; j++) { 1741 PetscInt cowner,col = gcdest[bj[j]]; 1742 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1743 if (rowner == cowner) dnnz[i]++; 1744 else onnz[i]++; 1745 } 1746 } 1747 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1748 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1749 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1750 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1751 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1752 1753 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1754 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1755 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1756 for (i=0; i<m; i++) { 1757 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1758 PetscInt j0,rowlen; 1759 rowlen = ai[i+1] - ai[i]; 1760 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1761 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1762 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1763 } 1764 rowlen = bi[i+1] - bi[i]; 1765 for (j0=j=0; j<rowlen; j0=j) { 1766 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1767 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1768 } 1769 } 1770 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1771 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1772 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1773 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1774 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1775 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1776 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1777 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1778 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1779 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1780 *B = Aperm; 1781 PetscFunctionReturn(0); 1782 } 1783 1784 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1785 { 1786 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1787 PetscErrorCode ierr; 1788 1789 PetscFunctionBegin; 1790 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1791 if (ghosts) *ghosts = aij->garray; 1792 PetscFunctionReturn(0); 1793 } 1794 1795 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1796 { 1797 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1798 Mat A = mat->A,B = mat->B; 1799 PetscErrorCode ierr; 1800 PetscLogDouble isend[5],irecv[5]; 1801 1802 PetscFunctionBegin; 1803 info->block_size = 1.0; 1804 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1805 1806 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1807 isend[3] = info->memory; isend[4] = info->mallocs; 1808 1809 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1810 1811 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1812 isend[3] += info->memory; isend[4] += info->mallocs; 1813 if (flag == MAT_LOCAL) { 1814 info->nz_used = isend[0]; 1815 info->nz_allocated = isend[1]; 1816 info->nz_unneeded = isend[2]; 1817 info->memory = isend[3]; 1818 info->mallocs = isend[4]; 1819 } else if (flag == MAT_GLOBAL_MAX) { 1820 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1821 1822 info->nz_used = irecv[0]; 1823 info->nz_allocated = irecv[1]; 1824 info->nz_unneeded = irecv[2]; 1825 info->memory = irecv[3]; 1826 info->mallocs = irecv[4]; 1827 } else if (flag == MAT_GLOBAL_SUM) { 1828 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1829 1830 info->nz_used = irecv[0]; 1831 info->nz_allocated = irecv[1]; 1832 info->nz_unneeded = irecv[2]; 1833 info->memory = irecv[3]; 1834 info->mallocs = irecv[4]; 1835 } 1836 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1837 info->fill_ratio_needed = 0; 1838 info->factor_mallocs = 0; 1839 PetscFunctionReturn(0); 1840 } 1841 1842 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1843 { 1844 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1845 PetscErrorCode ierr; 1846 1847 PetscFunctionBegin; 1848 switch (op) { 1849 case MAT_NEW_NONZERO_LOCATIONS: 1850 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1851 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1852 case MAT_KEEP_NONZERO_PATTERN: 1853 case MAT_NEW_NONZERO_LOCATION_ERR: 1854 case MAT_USE_INODES: 1855 case MAT_IGNORE_ZERO_ENTRIES: 1856 MatCheckPreallocated(A,1); 1857 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1858 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1859 break; 1860 case MAT_ROW_ORIENTED: 1861 MatCheckPreallocated(A,1); 1862 a->roworiented = flg; 1863 1864 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1865 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1866 break; 1867 case MAT_NEW_DIAGONALS: 1868 case MAT_SORTED_FULL: 1869 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1870 break; 1871 case MAT_IGNORE_OFF_PROC_ENTRIES: 1872 a->donotstash = flg; 1873 break; 1874 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1875 case MAT_SPD: 1876 case MAT_SYMMETRIC: 1877 case MAT_STRUCTURALLY_SYMMETRIC: 1878 case MAT_HERMITIAN: 1879 case MAT_SYMMETRY_ETERNAL: 1880 break; 1881 case MAT_SUBMAT_SINGLEIS: 1882 A->submat_singleis = flg; 1883 break; 1884 case MAT_STRUCTURE_ONLY: 1885 /* The option is handled directly by MatSetOption() */ 1886 break; 1887 default: 1888 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1889 } 1890 PetscFunctionReturn(0); 1891 } 1892 1893 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1894 { 1895 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1896 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1897 PetscErrorCode ierr; 1898 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1899 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1900 PetscInt *cmap,*idx_p; 1901 1902 PetscFunctionBegin; 1903 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1904 mat->getrowactive = PETSC_TRUE; 1905 1906 if (!mat->rowvalues && (idx || v)) { 1907 /* 1908 allocate enough space to hold information from the longest row. 1909 */ 1910 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1911 PetscInt max = 1,tmp; 1912 for (i=0; i<matin->rmap->n; i++) { 1913 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1914 if (max < tmp) max = tmp; 1915 } 1916 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1917 } 1918 1919 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1920 lrow = row - rstart; 1921 1922 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1923 if (!v) {pvA = 0; pvB = 0;} 1924 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1925 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1926 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1927 nztot = nzA + nzB; 1928 1929 cmap = mat->garray; 1930 if (v || idx) { 1931 if (nztot) { 1932 /* Sort by increasing column numbers, assuming A and B already sorted */ 1933 PetscInt imark = -1; 1934 if (v) { 1935 *v = v_p = mat->rowvalues; 1936 for (i=0; i<nzB; i++) { 1937 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1938 else break; 1939 } 1940 imark = i; 1941 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1942 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1943 } 1944 if (idx) { 1945 *idx = idx_p = mat->rowindices; 1946 if (imark > -1) { 1947 for (i=0; i<imark; i++) { 1948 idx_p[i] = cmap[cworkB[i]]; 1949 } 1950 } else { 1951 for (i=0; i<nzB; i++) { 1952 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1953 else break; 1954 } 1955 imark = i; 1956 } 1957 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1958 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1959 } 1960 } else { 1961 if (idx) *idx = 0; 1962 if (v) *v = 0; 1963 } 1964 } 1965 *nz = nztot; 1966 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1967 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1968 PetscFunctionReturn(0); 1969 } 1970 1971 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1972 { 1973 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1974 1975 PetscFunctionBegin; 1976 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1977 aij->getrowactive = PETSC_FALSE; 1978 PetscFunctionReturn(0); 1979 } 1980 1981 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1982 { 1983 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1984 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1985 PetscErrorCode ierr; 1986 PetscInt i,j,cstart = mat->cmap->rstart; 1987 PetscReal sum = 0.0; 1988 MatScalar *v; 1989 1990 PetscFunctionBegin; 1991 if (aij->size == 1) { 1992 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1993 } else { 1994 if (type == NORM_FROBENIUS) { 1995 v = amat->a; 1996 for (i=0; i<amat->nz; i++) { 1997 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1998 } 1999 v = bmat->a; 2000 for (i=0; i<bmat->nz; i++) { 2001 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2002 } 2003 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2004 *norm = PetscSqrtReal(*norm); 2005 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 2006 } else if (type == NORM_1) { /* max column norm */ 2007 PetscReal *tmp,*tmp2; 2008 PetscInt *jj,*garray = aij->garray; 2009 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 2010 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 2011 *norm = 0.0; 2012 v = amat->a; jj = amat->j; 2013 for (j=0; j<amat->nz; j++) { 2014 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2015 } 2016 v = bmat->a; jj = bmat->j; 2017 for (j=0; j<bmat->nz; j++) { 2018 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2019 } 2020 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2021 for (j=0; j<mat->cmap->N; j++) { 2022 if (tmp2[j] > *norm) *norm = tmp2[j]; 2023 } 2024 ierr = PetscFree(tmp);CHKERRQ(ierr); 2025 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2026 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2027 } else if (type == NORM_INFINITY) { /* max row norm */ 2028 PetscReal ntemp = 0.0; 2029 for (j=0; j<aij->A->rmap->n; j++) { 2030 v = amat->a + amat->i[j]; 2031 sum = 0.0; 2032 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2033 sum += PetscAbsScalar(*v); v++; 2034 } 2035 v = bmat->a + bmat->i[j]; 2036 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2037 sum += PetscAbsScalar(*v); v++; 2038 } 2039 if (sum > ntemp) ntemp = sum; 2040 } 2041 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2042 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2043 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2044 } 2045 PetscFunctionReturn(0); 2046 } 2047 2048 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2049 { 2050 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2051 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2052 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2053 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2054 PetscErrorCode ierr; 2055 Mat B,A_diag,*B_diag; 2056 const MatScalar *array; 2057 2058 PetscFunctionBegin; 2059 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2060 ai = Aloc->i; aj = Aloc->j; 2061 bi = Bloc->i; bj = Bloc->j; 2062 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2063 PetscInt *d_nnz,*g_nnz,*o_nnz; 2064 PetscSFNode *oloc; 2065 PETSC_UNUSED PetscSF sf; 2066 2067 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2068 /* compute d_nnz for preallocation */ 2069 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2070 for (i=0; i<ai[ma]; i++) { 2071 d_nnz[aj[i]]++; 2072 } 2073 /* compute local off-diagonal contributions */ 2074 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2075 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2076 /* map those to global */ 2077 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2078 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2079 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2080 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2081 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2082 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2083 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2084 2085 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2086 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2087 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2088 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2089 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2090 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2091 } else { 2092 B = *matout; 2093 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2094 } 2095 2096 b = (Mat_MPIAIJ*)B->data; 2097 A_diag = a->A; 2098 B_diag = &b->A; 2099 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2100 A_diag_ncol = A_diag->cmap->N; 2101 B_diag_ilen = sub_B_diag->ilen; 2102 B_diag_i = sub_B_diag->i; 2103 2104 /* Set ilen for diagonal of B */ 2105 for (i=0; i<A_diag_ncol; i++) { 2106 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2107 } 2108 2109 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2110 very quickly (=without using MatSetValues), because all writes are local. */ 2111 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2112 2113 /* copy over the B part */ 2114 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2115 array = Bloc->a; 2116 row = A->rmap->rstart; 2117 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2118 cols_tmp = cols; 2119 for (i=0; i<mb; i++) { 2120 ncol = bi[i+1]-bi[i]; 2121 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2122 row++; 2123 array += ncol; cols_tmp += ncol; 2124 } 2125 ierr = PetscFree(cols);CHKERRQ(ierr); 2126 2127 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2128 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2129 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2130 *matout = B; 2131 } else { 2132 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2133 } 2134 PetscFunctionReturn(0); 2135 } 2136 2137 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2138 { 2139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2140 Mat a = aij->A,b = aij->B; 2141 PetscErrorCode ierr; 2142 PetscInt s1,s2,s3; 2143 2144 PetscFunctionBegin; 2145 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2146 if (rr) { 2147 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2148 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2149 /* Overlap communication with computation. */ 2150 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2151 } 2152 if (ll) { 2153 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2154 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2155 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2156 } 2157 /* scale the diagonal block */ 2158 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2159 2160 if (rr) { 2161 /* Do a scatter end and then right scale the off-diagonal block */ 2162 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2163 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2169 { 2170 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2171 PetscErrorCode ierr; 2172 2173 PetscFunctionBegin; 2174 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2175 PetscFunctionReturn(0); 2176 } 2177 2178 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2179 { 2180 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2181 Mat a,b,c,d; 2182 PetscBool flg; 2183 PetscErrorCode ierr; 2184 2185 PetscFunctionBegin; 2186 a = matA->A; b = matA->B; 2187 c = matB->A; d = matB->B; 2188 2189 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2190 if (flg) { 2191 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2192 } 2193 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2194 PetscFunctionReturn(0); 2195 } 2196 2197 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2198 { 2199 PetscErrorCode ierr; 2200 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2201 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2202 2203 PetscFunctionBegin; 2204 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2205 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2206 /* because of the column compression in the off-processor part of the matrix a->B, 2207 the number of columns in a->B and b->B may be different, hence we cannot call 2208 the MatCopy() directly on the two parts. If need be, we can provide a more 2209 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2210 then copying the submatrices */ 2211 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2212 } else { 2213 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2214 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2215 } 2216 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2217 PetscFunctionReturn(0); 2218 } 2219 2220 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2221 { 2222 PetscErrorCode ierr; 2223 2224 PetscFunctionBegin; 2225 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2226 PetscFunctionReturn(0); 2227 } 2228 2229 /* 2230 Computes the number of nonzeros per row needed for preallocation when X and Y 2231 have different nonzero structure. 2232 */ 2233 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2234 { 2235 PetscInt i,j,k,nzx,nzy; 2236 2237 PetscFunctionBegin; 2238 /* Set the number of nonzeros in the new matrix */ 2239 for (i=0; i<m; i++) { 2240 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2241 nzx = xi[i+1] - xi[i]; 2242 nzy = yi[i+1] - yi[i]; 2243 nnz[i] = 0; 2244 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2245 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2246 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2247 nnz[i]++; 2248 } 2249 for (; k<nzy; k++) nnz[i]++; 2250 } 2251 PetscFunctionReturn(0); 2252 } 2253 2254 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2255 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2256 { 2257 PetscErrorCode ierr; 2258 PetscInt m = Y->rmap->N; 2259 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2260 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2261 2262 PetscFunctionBegin; 2263 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2268 { 2269 PetscErrorCode ierr; 2270 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2271 PetscBLASInt bnz,one=1; 2272 Mat_SeqAIJ *x,*y; 2273 2274 PetscFunctionBegin; 2275 if (str == SAME_NONZERO_PATTERN) { 2276 PetscScalar alpha = a; 2277 x = (Mat_SeqAIJ*)xx->A->data; 2278 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2279 y = (Mat_SeqAIJ*)yy->A->data; 2280 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2281 x = (Mat_SeqAIJ*)xx->B->data; 2282 y = (Mat_SeqAIJ*)yy->B->data; 2283 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2284 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2285 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2286 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2287 will be updated */ 2288 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2289 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2290 Y->offloadmask = PETSC_OFFLOAD_CPU; 2291 } 2292 #endif 2293 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2294 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2295 } else { 2296 Mat B; 2297 PetscInt *nnz_d,*nnz_o; 2298 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2299 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2300 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2301 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2302 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2303 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2304 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2305 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2306 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2307 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2308 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2309 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2310 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2311 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2312 } 2313 PetscFunctionReturn(0); 2314 } 2315 2316 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2317 2318 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2319 { 2320 #if defined(PETSC_USE_COMPLEX) 2321 PetscErrorCode ierr; 2322 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2323 2324 PetscFunctionBegin; 2325 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2326 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2327 #else 2328 PetscFunctionBegin; 2329 #endif 2330 PetscFunctionReturn(0); 2331 } 2332 2333 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2334 { 2335 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2336 PetscErrorCode ierr; 2337 2338 PetscFunctionBegin; 2339 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2340 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2341 PetscFunctionReturn(0); 2342 } 2343 2344 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2345 { 2346 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2347 PetscErrorCode ierr; 2348 2349 PetscFunctionBegin; 2350 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2351 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2352 PetscFunctionReturn(0); 2353 } 2354 2355 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2356 { 2357 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2358 PetscErrorCode ierr; 2359 PetscInt i,*idxb = 0; 2360 PetscScalar *va,*vb; 2361 Vec vtmp; 2362 2363 PetscFunctionBegin; 2364 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2365 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2366 if (idx) { 2367 for (i=0; i<A->rmap->n; i++) { 2368 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2369 } 2370 } 2371 2372 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2373 if (idx) { 2374 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2375 } 2376 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2377 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2378 2379 for (i=0; i<A->rmap->n; i++) { 2380 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2381 va[i] = vb[i]; 2382 if (idx) idx[i] = a->garray[idxb[i]]; 2383 } 2384 } 2385 2386 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2387 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2388 ierr = PetscFree(idxb);CHKERRQ(ierr); 2389 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2390 PetscFunctionReturn(0); 2391 } 2392 2393 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2394 { 2395 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2396 PetscErrorCode ierr; 2397 PetscInt i,*idxb = 0; 2398 PetscScalar *va,*vb; 2399 Vec vtmp; 2400 2401 PetscFunctionBegin; 2402 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2403 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2404 if (idx) { 2405 for (i=0; i<A->cmap->n; i++) { 2406 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2407 } 2408 } 2409 2410 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2411 if (idx) { 2412 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2413 } 2414 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2415 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2416 2417 for (i=0; i<A->rmap->n; i++) { 2418 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2419 va[i] = vb[i]; 2420 if (idx) idx[i] = a->garray[idxb[i]]; 2421 } 2422 } 2423 2424 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2425 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2426 ierr = PetscFree(idxb);CHKERRQ(ierr); 2427 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2428 PetscFunctionReturn(0); 2429 } 2430 2431 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2432 { 2433 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2434 PetscInt n = A->rmap->n; 2435 PetscInt cstart = A->cmap->rstart; 2436 PetscInt *cmap = mat->garray; 2437 PetscInt *diagIdx, *offdiagIdx; 2438 Vec diagV, offdiagV; 2439 PetscScalar *a, *diagA, *offdiagA; 2440 PetscInt r; 2441 PetscErrorCode ierr; 2442 2443 PetscFunctionBegin; 2444 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2445 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2446 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2447 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2448 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2449 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2450 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2451 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2452 for (r = 0; r < n; ++r) { 2453 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2454 a[r] = diagA[r]; 2455 idx[r] = cstart + diagIdx[r]; 2456 } else { 2457 a[r] = offdiagA[r]; 2458 idx[r] = cmap[offdiagIdx[r]]; 2459 } 2460 } 2461 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2462 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2463 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2464 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2465 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2466 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2471 { 2472 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2473 PetscInt n = A->rmap->n; 2474 PetscInt cstart = A->cmap->rstart; 2475 PetscInt *cmap = mat->garray; 2476 PetscInt *diagIdx, *offdiagIdx; 2477 Vec diagV, offdiagV; 2478 PetscScalar *a, *diagA, *offdiagA; 2479 PetscInt r; 2480 PetscErrorCode ierr; 2481 2482 PetscFunctionBegin; 2483 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2484 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2485 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2486 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2487 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2488 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2489 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2490 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2491 for (r = 0; r < n; ++r) { 2492 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2493 a[r] = diagA[r]; 2494 idx[r] = cstart + diagIdx[r]; 2495 } else { 2496 a[r] = offdiagA[r]; 2497 idx[r] = cmap[offdiagIdx[r]]; 2498 } 2499 } 2500 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2501 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2502 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2503 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2504 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2505 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2510 { 2511 PetscErrorCode ierr; 2512 Mat *dummy; 2513 2514 PetscFunctionBegin; 2515 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2516 *newmat = *dummy; 2517 ierr = PetscFree(dummy);CHKERRQ(ierr); 2518 PetscFunctionReturn(0); 2519 } 2520 2521 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2522 { 2523 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2524 PetscErrorCode ierr; 2525 2526 PetscFunctionBegin; 2527 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2528 A->factorerrortype = a->A->factorerrortype; 2529 PetscFunctionReturn(0); 2530 } 2531 2532 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2533 { 2534 PetscErrorCode ierr; 2535 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2536 2537 PetscFunctionBegin; 2538 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2539 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2540 if (x->assembled) { 2541 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2542 } else { 2543 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2544 } 2545 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2546 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2547 PetscFunctionReturn(0); 2548 } 2549 2550 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2551 { 2552 PetscFunctionBegin; 2553 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2554 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2555 PetscFunctionReturn(0); 2556 } 2557 2558 /*@ 2559 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2560 2561 Collective on Mat 2562 2563 Input Parameters: 2564 + A - the matrix 2565 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2566 2567 Level: advanced 2568 2569 @*/ 2570 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2571 { 2572 PetscErrorCode ierr; 2573 2574 PetscFunctionBegin; 2575 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2576 PetscFunctionReturn(0); 2577 } 2578 2579 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2580 { 2581 PetscErrorCode ierr; 2582 PetscBool sc = PETSC_FALSE,flg; 2583 2584 PetscFunctionBegin; 2585 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2586 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2587 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2588 if (flg) { 2589 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2590 } 2591 ierr = PetscOptionsTail();CHKERRQ(ierr); 2592 PetscFunctionReturn(0); 2593 } 2594 2595 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2596 { 2597 PetscErrorCode ierr; 2598 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2599 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2600 2601 PetscFunctionBegin; 2602 if (!Y->preallocated) { 2603 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2604 } else if (!aij->nz) { 2605 PetscInt nonew = aij->nonew; 2606 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2607 aij->nonew = nonew; 2608 } 2609 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2610 PetscFunctionReturn(0); 2611 } 2612 2613 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2614 { 2615 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2616 PetscErrorCode ierr; 2617 2618 PetscFunctionBegin; 2619 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2620 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2621 if (d) { 2622 PetscInt rstart; 2623 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2624 *d += rstart; 2625 2626 } 2627 PetscFunctionReturn(0); 2628 } 2629 2630 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2631 { 2632 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2633 PetscErrorCode ierr; 2634 2635 PetscFunctionBegin; 2636 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2637 PetscFunctionReturn(0); 2638 } 2639 2640 /* -------------------------------------------------------------------*/ 2641 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2642 MatGetRow_MPIAIJ, 2643 MatRestoreRow_MPIAIJ, 2644 MatMult_MPIAIJ, 2645 /* 4*/ MatMultAdd_MPIAIJ, 2646 MatMultTranspose_MPIAIJ, 2647 MatMultTransposeAdd_MPIAIJ, 2648 0, 2649 0, 2650 0, 2651 /*10*/ 0, 2652 0, 2653 0, 2654 MatSOR_MPIAIJ, 2655 MatTranspose_MPIAIJ, 2656 /*15*/ MatGetInfo_MPIAIJ, 2657 MatEqual_MPIAIJ, 2658 MatGetDiagonal_MPIAIJ, 2659 MatDiagonalScale_MPIAIJ, 2660 MatNorm_MPIAIJ, 2661 /*20*/ MatAssemblyBegin_MPIAIJ, 2662 MatAssemblyEnd_MPIAIJ, 2663 MatSetOption_MPIAIJ, 2664 MatZeroEntries_MPIAIJ, 2665 /*24*/ MatZeroRows_MPIAIJ, 2666 0, 2667 0, 2668 0, 2669 0, 2670 /*29*/ MatSetUp_MPIAIJ, 2671 0, 2672 0, 2673 MatGetDiagonalBlock_MPIAIJ, 2674 0, 2675 /*34*/ MatDuplicate_MPIAIJ, 2676 0, 2677 0, 2678 0, 2679 0, 2680 /*39*/ MatAXPY_MPIAIJ, 2681 MatCreateSubMatrices_MPIAIJ, 2682 MatIncreaseOverlap_MPIAIJ, 2683 MatGetValues_MPIAIJ, 2684 MatCopy_MPIAIJ, 2685 /*44*/ MatGetRowMax_MPIAIJ, 2686 MatScale_MPIAIJ, 2687 MatShift_MPIAIJ, 2688 MatDiagonalSet_MPIAIJ, 2689 MatZeroRowsColumns_MPIAIJ, 2690 /*49*/ MatSetRandom_MPIAIJ, 2691 0, 2692 0, 2693 0, 2694 0, 2695 /*54*/ MatFDColoringCreate_MPIXAIJ, 2696 0, 2697 MatSetUnfactored_MPIAIJ, 2698 MatPermute_MPIAIJ, 2699 0, 2700 /*59*/ MatCreateSubMatrix_MPIAIJ, 2701 MatDestroy_MPIAIJ, 2702 MatView_MPIAIJ, 2703 0, 2704 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2705 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2706 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2707 0, 2708 0, 2709 0, 2710 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2711 MatGetRowMinAbs_MPIAIJ, 2712 0, 2713 0, 2714 0, 2715 0, 2716 /*75*/ MatFDColoringApply_AIJ, 2717 MatSetFromOptions_MPIAIJ, 2718 0, 2719 0, 2720 MatFindZeroDiagonals_MPIAIJ, 2721 /*80*/ 0, 2722 0, 2723 0, 2724 /*83*/ MatLoad_MPIAIJ, 2725 MatIsSymmetric_MPIAIJ, 2726 0, 2727 0, 2728 0, 2729 0, 2730 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2731 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2732 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2733 MatPtAP_MPIAIJ_MPIAIJ, 2734 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2735 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2736 0, 2737 0, 2738 0, 2739 MatPinToCPU_MPIAIJ, 2740 /*99*/ 0, 2741 0, 2742 0, 2743 MatConjugate_MPIAIJ, 2744 0, 2745 /*104*/MatSetValuesRow_MPIAIJ, 2746 MatRealPart_MPIAIJ, 2747 MatImaginaryPart_MPIAIJ, 2748 0, 2749 0, 2750 /*109*/0, 2751 0, 2752 MatGetRowMin_MPIAIJ, 2753 0, 2754 MatMissingDiagonal_MPIAIJ, 2755 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2756 0, 2757 MatGetGhosts_MPIAIJ, 2758 0, 2759 0, 2760 /*119*/0, 2761 0, 2762 0, 2763 0, 2764 MatGetMultiProcBlock_MPIAIJ, 2765 /*124*/MatFindNonzeroRows_MPIAIJ, 2766 MatGetColumnNorms_MPIAIJ, 2767 MatInvertBlockDiagonal_MPIAIJ, 2768 MatInvertVariableBlockDiagonal_MPIAIJ, 2769 MatCreateSubMatricesMPI_MPIAIJ, 2770 /*129*/0, 2771 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2772 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2773 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2774 0, 2775 /*134*/0, 2776 0, 2777 MatRARt_MPIAIJ_MPIAIJ, 2778 0, 2779 0, 2780 /*139*/MatSetBlockSizes_MPIAIJ, 2781 0, 2782 0, 2783 MatFDColoringSetUp_MPIXAIJ, 2784 MatFindOffBlockDiagonalEntries_MPIAIJ, 2785 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2786 }; 2787 2788 /* ----------------------------------------------------------------------------------------*/ 2789 2790 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2791 { 2792 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2793 PetscErrorCode ierr; 2794 2795 PetscFunctionBegin; 2796 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2797 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2798 PetscFunctionReturn(0); 2799 } 2800 2801 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2802 { 2803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2804 PetscErrorCode ierr; 2805 2806 PetscFunctionBegin; 2807 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2808 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2809 PetscFunctionReturn(0); 2810 } 2811 2812 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2813 { 2814 Mat_MPIAIJ *b; 2815 PetscErrorCode ierr; 2816 PetscMPIInt size; 2817 2818 PetscFunctionBegin; 2819 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2820 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2821 b = (Mat_MPIAIJ*)B->data; 2822 2823 #if defined(PETSC_USE_CTABLE) 2824 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2825 #else 2826 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2827 #endif 2828 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2829 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2830 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2831 2832 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2833 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2834 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2835 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2836 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2837 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2838 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2839 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2840 2841 if (!B->preallocated) { 2842 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2843 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2844 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2845 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2846 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2847 } 2848 2849 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2850 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2851 B->preallocated = PETSC_TRUE; 2852 B->was_assembled = PETSC_FALSE; 2853 B->assembled = PETSC_FALSE; 2854 PetscFunctionReturn(0); 2855 } 2856 2857 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2858 { 2859 Mat_MPIAIJ *b; 2860 PetscErrorCode ierr; 2861 2862 PetscFunctionBegin; 2863 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2864 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2865 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2866 b = (Mat_MPIAIJ*)B->data; 2867 2868 #if defined(PETSC_USE_CTABLE) 2869 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2870 #else 2871 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2872 #endif 2873 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2874 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2875 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2876 2877 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2878 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2879 B->preallocated = PETSC_TRUE; 2880 B->was_assembled = PETSC_FALSE; 2881 B->assembled = PETSC_FALSE; 2882 PetscFunctionReturn(0); 2883 } 2884 2885 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2886 { 2887 Mat mat; 2888 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2889 PetscErrorCode ierr; 2890 2891 PetscFunctionBegin; 2892 *newmat = 0; 2893 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2894 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2895 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2896 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2897 a = (Mat_MPIAIJ*)mat->data; 2898 2899 mat->factortype = matin->factortype; 2900 mat->assembled = PETSC_TRUE; 2901 mat->insertmode = NOT_SET_VALUES; 2902 mat->preallocated = PETSC_TRUE; 2903 2904 a->size = oldmat->size; 2905 a->rank = oldmat->rank; 2906 a->donotstash = oldmat->donotstash; 2907 a->roworiented = oldmat->roworiented; 2908 a->rowindices = 0; 2909 a->rowvalues = 0; 2910 a->getrowactive = PETSC_FALSE; 2911 2912 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2913 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2914 2915 if (oldmat->colmap) { 2916 #if defined(PETSC_USE_CTABLE) 2917 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2918 #else 2919 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2920 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2921 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2922 #endif 2923 } else a->colmap = 0; 2924 if (oldmat->garray) { 2925 PetscInt len; 2926 len = oldmat->B->cmap->n; 2927 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2928 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2929 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2930 } else a->garray = 0; 2931 2932 /* It may happen MatDuplicate is called with a non-assembled matrix 2933 In fact, MatDuplicate only requires the matrix to be preallocated 2934 This may happen inside a DMCreateMatrix_Shell */ 2935 if (oldmat->lvec) { 2936 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2937 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2938 } 2939 if (oldmat->Mvctx) { 2940 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2941 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2942 } 2943 if (oldmat->Mvctx_mpi1) { 2944 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2945 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2946 } 2947 2948 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2949 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2950 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2951 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2952 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2953 *newmat = mat; 2954 PetscFunctionReturn(0); 2955 } 2956 2957 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2958 { 2959 PetscBool isbinary, ishdf5; 2960 PetscErrorCode ierr; 2961 2962 PetscFunctionBegin; 2963 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2964 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2965 /* force binary viewer to load .info file if it has not yet done so */ 2966 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2967 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2968 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2969 if (isbinary) { 2970 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2971 } else if (ishdf5) { 2972 #if defined(PETSC_HAVE_HDF5) 2973 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2974 #else 2975 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2976 #endif 2977 } else { 2978 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2979 } 2980 PetscFunctionReturn(0); 2981 } 2982 2983 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2984 { 2985 PetscScalar *vals,*svals; 2986 MPI_Comm comm; 2987 PetscErrorCode ierr; 2988 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2989 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2990 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2991 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2992 PetscInt cend,cstart,n,*rowners; 2993 int fd; 2994 PetscInt bs = newMat->rmap->bs; 2995 2996 PetscFunctionBegin; 2997 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2998 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2999 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3000 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3001 if (!rank) { 3002 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3003 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3004 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 3005 } 3006 3007 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 3008 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3009 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3010 if (bs < 0) bs = 1; 3011 3012 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3013 M = header[1]; N = header[2]; 3014 3015 /* If global sizes are set, check if they are consistent with that given in the file */ 3016 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 3017 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 3018 3019 /* determine ownership of all (block) rows */ 3020 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3021 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3022 else m = newMat->rmap->n; /* Set by user */ 3023 3024 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 3025 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3026 3027 /* First process needs enough room for process with most rows */ 3028 if (!rank) { 3029 mmax = rowners[1]; 3030 for (i=2; i<=size; i++) { 3031 mmax = PetscMax(mmax, rowners[i]); 3032 } 3033 } else mmax = -1; /* unused, but compilers complain */ 3034 3035 rowners[0] = 0; 3036 for (i=2; i<=size; i++) { 3037 rowners[i] += rowners[i-1]; 3038 } 3039 rstart = rowners[rank]; 3040 rend = rowners[rank+1]; 3041 3042 /* distribute row lengths to all processors */ 3043 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3044 if (!rank) { 3045 ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr); 3046 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3047 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3048 for (j=0; j<m; j++) { 3049 procsnz[0] += ourlens[j]; 3050 } 3051 for (i=1; i<size; i++) { 3052 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr); 3053 /* calculate the number of nonzeros on each processor */ 3054 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3055 procsnz[i] += rowlengths[j]; 3056 } 3057 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3058 } 3059 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3060 } else { 3061 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3062 } 3063 3064 if (!rank) { 3065 /* determine max buffer needed and allocate it */ 3066 maxnz = 0; 3067 for (i=0; i<size; i++) { 3068 maxnz = PetscMax(maxnz,procsnz[i]); 3069 } 3070 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3071 3072 /* read in my part of the matrix column indices */ 3073 nz = procsnz[0]; 3074 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3075 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3076 3077 /* read in every one elses and ship off */ 3078 for (i=1; i<size; i++) { 3079 nz = procsnz[i]; 3080 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3081 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3082 } 3083 ierr = PetscFree(cols);CHKERRQ(ierr); 3084 } else { 3085 /* determine buffer space needed for message */ 3086 nz = 0; 3087 for (i=0; i<m; i++) { 3088 nz += ourlens[i]; 3089 } 3090 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3091 3092 /* receive message of column indices*/ 3093 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3094 } 3095 3096 /* determine column ownership if matrix is not square */ 3097 if (N != M) { 3098 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3099 else n = newMat->cmap->n; 3100 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3101 cstart = cend - n; 3102 } else { 3103 cstart = rstart; 3104 cend = rend; 3105 n = cend - cstart; 3106 } 3107 3108 /* loop over local rows, determining number of off diagonal entries */ 3109 ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr); 3110 jj = 0; 3111 for (i=0; i<m; i++) { 3112 for (j=0; j<ourlens[i]; j++) { 3113 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3114 jj++; 3115 } 3116 } 3117 3118 for (i=0; i<m; i++) { 3119 ourlens[i] -= offlens[i]; 3120 } 3121 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3122 3123 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3124 3125 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3126 3127 for (i=0; i<m; i++) { 3128 ourlens[i] += offlens[i]; 3129 } 3130 3131 if (!rank) { 3132 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3133 3134 /* read in my part of the matrix numerical values */ 3135 nz = procsnz[0]; 3136 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3137 3138 /* insert into matrix */ 3139 jj = rstart; 3140 smycols = mycols; 3141 svals = vals; 3142 for (i=0; i<m; i++) { 3143 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3144 smycols += ourlens[i]; 3145 svals += ourlens[i]; 3146 jj++; 3147 } 3148 3149 /* read in other processors and ship out */ 3150 for (i=1; i<size; i++) { 3151 nz = procsnz[i]; 3152 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3153 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3154 } 3155 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3156 } else { 3157 /* receive numeric values */ 3158 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3159 3160 /* receive message of values*/ 3161 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3162 3163 /* insert into matrix */ 3164 jj = rstart; 3165 smycols = mycols; 3166 svals = vals; 3167 for (i=0; i<m; i++) { 3168 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3169 smycols += ourlens[i]; 3170 svals += ourlens[i]; 3171 jj++; 3172 } 3173 } 3174 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3175 ierr = PetscFree(vals);CHKERRQ(ierr); 3176 ierr = PetscFree(mycols);CHKERRQ(ierr); 3177 ierr = PetscFree(rowners);CHKERRQ(ierr); 3178 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3179 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3180 PetscFunctionReturn(0); 3181 } 3182 3183 /* Not scalable because of ISAllGather() unless getting all columns. */ 3184 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3185 { 3186 PetscErrorCode ierr; 3187 IS iscol_local; 3188 PetscBool isstride; 3189 PetscMPIInt lisstride=0,gisstride; 3190 3191 PetscFunctionBegin; 3192 /* check if we are grabbing all columns*/ 3193 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3194 3195 if (isstride) { 3196 PetscInt start,len,mstart,mlen; 3197 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3198 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3199 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3200 if (mstart == start && mlen-mstart == len) lisstride = 1; 3201 } 3202 3203 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3204 if (gisstride) { 3205 PetscInt N; 3206 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3207 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3208 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3209 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3210 } else { 3211 PetscInt cbs; 3212 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3213 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3214 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3215 } 3216 3217 *isseq = iscol_local; 3218 PetscFunctionReturn(0); 3219 } 3220 3221 /* 3222 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3223 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3224 3225 Input Parameters: 3226 mat - matrix 3227 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3228 i.e., mat->rstart <= isrow[i] < mat->rend 3229 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3230 i.e., mat->cstart <= iscol[i] < mat->cend 3231 Output Parameter: 3232 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3233 iscol_o - sequential column index set for retrieving mat->B 3234 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3235 */ 3236 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3237 { 3238 PetscErrorCode ierr; 3239 Vec x,cmap; 3240 const PetscInt *is_idx; 3241 PetscScalar *xarray,*cmaparray; 3242 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3243 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3244 Mat B=a->B; 3245 Vec lvec=a->lvec,lcmap; 3246 PetscInt i,cstart,cend,Bn=B->cmap->N; 3247 MPI_Comm comm; 3248 VecScatter Mvctx=a->Mvctx; 3249 3250 PetscFunctionBegin; 3251 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3252 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3253 3254 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3255 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3256 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3257 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3258 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3259 3260 /* Get start indices */ 3261 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3262 isstart -= ncols; 3263 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3264 3265 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3266 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3267 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3268 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3269 for (i=0; i<ncols; i++) { 3270 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3271 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3272 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3273 } 3274 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3275 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3276 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3277 3278 /* Get iscol_d */ 3279 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3280 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3281 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3282 3283 /* Get isrow_d */ 3284 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3285 rstart = mat->rmap->rstart; 3286 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3287 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3288 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3289 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3290 3291 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3292 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3293 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3294 3295 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3296 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3297 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3298 3299 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3300 3301 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3302 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3303 3304 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3305 /* off-process column indices */ 3306 count = 0; 3307 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3308 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3309 3310 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3311 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3312 for (i=0; i<Bn; i++) { 3313 if (PetscRealPart(xarray[i]) > -1.0) { 3314 idx[count] = i; /* local column index in off-diagonal part B */ 3315 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3316 count++; 3317 } 3318 } 3319 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3320 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3321 3322 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3323 /* cannot ensure iscol_o has same blocksize as iscol! */ 3324 3325 ierr = PetscFree(idx);CHKERRQ(ierr); 3326 *garray = cmap1; 3327 3328 ierr = VecDestroy(&x);CHKERRQ(ierr); 3329 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3330 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3331 PetscFunctionReturn(0); 3332 } 3333 3334 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3335 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3336 { 3337 PetscErrorCode ierr; 3338 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3339 Mat M = NULL; 3340 MPI_Comm comm; 3341 IS iscol_d,isrow_d,iscol_o; 3342 Mat Asub = NULL,Bsub = NULL; 3343 PetscInt n; 3344 3345 PetscFunctionBegin; 3346 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3347 3348 if (call == MAT_REUSE_MATRIX) { 3349 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3350 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3351 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3352 3353 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3354 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3355 3356 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3357 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3358 3359 /* Update diagonal and off-diagonal portions of submat */ 3360 asub = (Mat_MPIAIJ*)(*submat)->data; 3361 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3362 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3363 if (n) { 3364 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3365 } 3366 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3367 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3368 3369 } else { /* call == MAT_INITIAL_MATRIX) */ 3370 const PetscInt *garray; 3371 PetscInt BsubN; 3372 3373 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3374 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3375 3376 /* Create local submatrices Asub and Bsub */ 3377 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3378 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3379 3380 /* Create submatrix M */ 3381 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3382 3383 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3384 asub = (Mat_MPIAIJ*)M->data; 3385 3386 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3387 n = asub->B->cmap->N; 3388 if (BsubN > n) { 3389 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3390 const PetscInt *idx; 3391 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3392 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3393 3394 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3395 j = 0; 3396 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3397 for (i=0; i<n; i++) { 3398 if (j >= BsubN) break; 3399 while (subgarray[i] > garray[j]) j++; 3400 3401 if (subgarray[i] == garray[j]) { 3402 idx_new[i] = idx[j++]; 3403 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3404 } 3405 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3406 3407 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3408 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3409 3410 } else if (BsubN < n) { 3411 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3412 } 3413 3414 ierr = PetscFree(garray);CHKERRQ(ierr); 3415 *submat = M; 3416 3417 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3418 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3419 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3420 3421 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3422 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3423 3424 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3425 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3426 } 3427 PetscFunctionReturn(0); 3428 } 3429 3430 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3431 { 3432 PetscErrorCode ierr; 3433 IS iscol_local=NULL,isrow_d; 3434 PetscInt csize; 3435 PetscInt n,i,j,start,end; 3436 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3437 MPI_Comm comm; 3438 3439 PetscFunctionBegin; 3440 /* If isrow has same processor distribution as mat, 3441 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3442 if (call == MAT_REUSE_MATRIX) { 3443 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3444 if (isrow_d) { 3445 sameRowDist = PETSC_TRUE; 3446 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3447 } else { 3448 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3449 if (iscol_local) { 3450 sameRowDist = PETSC_TRUE; 3451 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3452 } 3453 } 3454 } else { 3455 /* Check if isrow has same processor distribution as mat */ 3456 sameDist[0] = PETSC_FALSE; 3457 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3458 if (!n) { 3459 sameDist[0] = PETSC_TRUE; 3460 } else { 3461 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3462 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3463 if (i >= start && j < end) { 3464 sameDist[0] = PETSC_TRUE; 3465 } 3466 } 3467 3468 /* Check if iscol has same processor distribution as mat */ 3469 sameDist[1] = PETSC_FALSE; 3470 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3471 if (!n) { 3472 sameDist[1] = PETSC_TRUE; 3473 } else { 3474 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3475 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3476 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3477 } 3478 3479 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3480 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3481 sameRowDist = tsameDist[0]; 3482 } 3483 3484 if (sameRowDist) { 3485 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3486 /* isrow and iscol have same processor distribution as mat */ 3487 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3488 PetscFunctionReturn(0); 3489 } else { /* sameRowDist */ 3490 /* isrow has same processor distribution as mat */ 3491 if (call == MAT_INITIAL_MATRIX) { 3492 PetscBool sorted; 3493 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3494 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3495 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3496 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3497 3498 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3499 if (sorted) { 3500 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3501 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3502 PetscFunctionReturn(0); 3503 } 3504 } else { /* call == MAT_REUSE_MATRIX */ 3505 IS iscol_sub; 3506 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3507 if (iscol_sub) { 3508 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3509 PetscFunctionReturn(0); 3510 } 3511 } 3512 } 3513 } 3514 3515 /* General case: iscol -> iscol_local which has global size of iscol */ 3516 if (call == MAT_REUSE_MATRIX) { 3517 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3518 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3519 } else { 3520 if (!iscol_local) { 3521 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3522 } 3523 } 3524 3525 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3526 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3527 3528 if (call == MAT_INITIAL_MATRIX) { 3529 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3530 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3531 } 3532 PetscFunctionReturn(0); 3533 } 3534 3535 /*@C 3536 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3537 and "off-diagonal" part of the matrix in CSR format. 3538 3539 Collective 3540 3541 Input Parameters: 3542 + comm - MPI communicator 3543 . A - "diagonal" portion of matrix 3544 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3545 - garray - global index of B columns 3546 3547 Output Parameter: 3548 . mat - the matrix, with input A as its local diagonal matrix 3549 Level: advanced 3550 3551 Notes: 3552 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3553 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3554 3555 .seealso: MatCreateMPIAIJWithSplitArrays() 3556 @*/ 3557 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3558 { 3559 PetscErrorCode ierr; 3560 Mat_MPIAIJ *maij; 3561 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3562 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3563 PetscScalar *oa=b->a; 3564 Mat Bnew; 3565 PetscInt m,n,N; 3566 3567 PetscFunctionBegin; 3568 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3569 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3570 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3571 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3572 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3573 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3574 3575 /* Get global columns of mat */ 3576 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3577 3578 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3579 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3580 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3581 maij = (Mat_MPIAIJ*)(*mat)->data; 3582 3583 (*mat)->preallocated = PETSC_TRUE; 3584 3585 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3586 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3587 3588 /* Set A as diagonal portion of *mat */ 3589 maij->A = A; 3590 3591 nz = oi[m]; 3592 for (i=0; i<nz; i++) { 3593 col = oj[i]; 3594 oj[i] = garray[col]; 3595 } 3596 3597 /* Set Bnew as off-diagonal portion of *mat */ 3598 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3599 bnew = (Mat_SeqAIJ*)Bnew->data; 3600 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3601 maij->B = Bnew; 3602 3603 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3604 3605 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3606 b->free_a = PETSC_FALSE; 3607 b->free_ij = PETSC_FALSE; 3608 ierr = MatDestroy(&B);CHKERRQ(ierr); 3609 3610 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3611 bnew->free_a = PETSC_TRUE; 3612 bnew->free_ij = PETSC_TRUE; 3613 3614 /* condense columns of maij->B */ 3615 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3616 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3617 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3618 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3619 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3620 PetscFunctionReturn(0); 3621 } 3622 3623 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3624 3625 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3626 { 3627 PetscErrorCode ierr; 3628 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3629 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3630 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3631 Mat M,Msub,B=a->B; 3632 MatScalar *aa; 3633 Mat_SeqAIJ *aij; 3634 PetscInt *garray = a->garray,*colsub,Ncols; 3635 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3636 IS iscol_sub,iscmap; 3637 const PetscInt *is_idx,*cmap; 3638 PetscBool allcolumns=PETSC_FALSE; 3639 MPI_Comm comm; 3640 3641 PetscFunctionBegin; 3642 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3643 3644 if (call == MAT_REUSE_MATRIX) { 3645 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3646 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3647 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3648 3649 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3650 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3651 3652 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3653 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3654 3655 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3656 3657 } else { /* call == MAT_INITIAL_MATRIX) */ 3658 PetscBool flg; 3659 3660 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3661 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3662 3663 /* (1) iscol -> nonscalable iscol_local */ 3664 /* Check for special case: each processor gets entire matrix columns */ 3665 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3666 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3667 if (allcolumns) { 3668 iscol_sub = iscol_local; 3669 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3670 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3671 3672 } else { 3673 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3674 PetscInt *idx,*cmap1,k; 3675 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3676 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3677 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3678 count = 0; 3679 k = 0; 3680 for (i=0; i<Ncols; i++) { 3681 j = is_idx[i]; 3682 if (j >= cstart && j < cend) { 3683 /* diagonal part of mat */ 3684 idx[count] = j; 3685 cmap1[count++] = i; /* column index in submat */ 3686 } else if (Bn) { 3687 /* off-diagonal part of mat */ 3688 if (j == garray[k]) { 3689 idx[count] = j; 3690 cmap1[count++] = i; /* column index in submat */ 3691 } else if (j > garray[k]) { 3692 while (j > garray[k] && k < Bn-1) k++; 3693 if (j == garray[k]) { 3694 idx[count] = j; 3695 cmap1[count++] = i; /* column index in submat */ 3696 } 3697 } 3698 } 3699 } 3700 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3701 3702 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3703 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3704 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3705 3706 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3707 } 3708 3709 /* (3) Create sequential Msub */ 3710 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3711 } 3712 3713 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3714 aij = (Mat_SeqAIJ*)(Msub)->data; 3715 ii = aij->i; 3716 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3717 3718 /* 3719 m - number of local rows 3720 Ncols - number of columns (same on all processors) 3721 rstart - first row in new global matrix generated 3722 */ 3723 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3724 3725 if (call == MAT_INITIAL_MATRIX) { 3726 /* (4) Create parallel newmat */ 3727 PetscMPIInt rank,size; 3728 PetscInt csize; 3729 3730 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3731 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3732 3733 /* 3734 Determine the number of non-zeros in the diagonal and off-diagonal 3735 portions of the matrix in order to do correct preallocation 3736 */ 3737 3738 /* first get start and end of "diagonal" columns */ 3739 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3740 if (csize == PETSC_DECIDE) { 3741 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3742 if (mglobal == Ncols) { /* square matrix */ 3743 nlocal = m; 3744 } else { 3745 nlocal = Ncols/size + ((Ncols % size) > rank); 3746 } 3747 } else { 3748 nlocal = csize; 3749 } 3750 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3751 rstart = rend - nlocal; 3752 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3753 3754 /* next, compute all the lengths */ 3755 jj = aij->j; 3756 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3757 olens = dlens + m; 3758 for (i=0; i<m; i++) { 3759 jend = ii[i+1] - ii[i]; 3760 olen = 0; 3761 dlen = 0; 3762 for (j=0; j<jend; j++) { 3763 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3764 else dlen++; 3765 jj++; 3766 } 3767 olens[i] = olen; 3768 dlens[i] = dlen; 3769 } 3770 3771 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3772 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3773 3774 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3775 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3776 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3777 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3778 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3779 ierr = PetscFree(dlens);CHKERRQ(ierr); 3780 3781 } else { /* call == MAT_REUSE_MATRIX */ 3782 M = *newmat; 3783 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3784 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3785 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3786 /* 3787 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3788 rather than the slower MatSetValues(). 3789 */ 3790 M->was_assembled = PETSC_TRUE; 3791 M->assembled = PETSC_FALSE; 3792 } 3793 3794 /* (5) Set values of Msub to *newmat */ 3795 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3796 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3797 3798 jj = aij->j; 3799 aa = aij->a; 3800 for (i=0; i<m; i++) { 3801 row = rstart + i; 3802 nz = ii[i+1] - ii[i]; 3803 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3804 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3805 jj += nz; aa += nz; 3806 } 3807 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3808 3809 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3810 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3811 3812 ierr = PetscFree(colsub);CHKERRQ(ierr); 3813 3814 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3815 if (call == MAT_INITIAL_MATRIX) { 3816 *newmat = M; 3817 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3818 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3819 3820 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3821 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3822 3823 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3824 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3825 3826 if (iscol_local) { 3827 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3828 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3829 } 3830 } 3831 PetscFunctionReturn(0); 3832 } 3833 3834 /* 3835 Not great since it makes two copies of the submatrix, first an SeqAIJ 3836 in local and then by concatenating the local matrices the end result. 3837 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3838 3839 Note: This requires a sequential iscol with all indices. 3840 */ 3841 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3842 { 3843 PetscErrorCode ierr; 3844 PetscMPIInt rank,size; 3845 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3846 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3847 Mat M,Mreuse; 3848 MatScalar *aa,*vwork; 3849 MPI_Comm comm; 3850 Mat_SeqAIJ *aij; 3851 PetscBool colflag,allcolumns=PETSC_FALSE; 3852 3853 PetscFunctionBegin; 3854 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3855 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3856 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3857 3858 /* Check for special case: each processor gets entire matrix columns */ 3859 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3860 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3861 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3862 3863 if (call == MAT_REUSE_MATRIX) { 3864 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3865 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3866 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3867 } else { 3868 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3869 } 3870 3871 /* 3872 m - number of local rows 3873 n - number of columns (same on all processors) 3874 rstart - first row in new global matrix generated 3875 */ 3876 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3877 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3878 if (call == MAT_INITIAL_MATRIX) { 3879 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3880 ii = aij->i; 3881 jj = aij->j; 3882 3883 /* 3884 Determine the number of non-zeros in the diagonal and off-diagonal 3885 portions of the matrix in order to do correct preallocation 3886 */ 3887 3888 /* first get start and end of "diagonal" columns */ 3889 if (csize == PETSC_DECIDE) { 3890 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3891 if (mglobal == n) { /* square matrix */ 3892 nlocal = m; 3893 } else { 3894 nlocal = n/size + ((n % size) > rank); 3895 } 3896 } else { 3897 nlocal = csize; 3898 } 3899 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3900 rstart = rend - nlocal; 3901 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3902 3903 /* next, compute all the lengths */ 3904 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3905 olens = dlens + m; 3906 for (i=0; i<m; i++) { 3907 jend = ii[i+1] - ii[i]; 3908 olen = 0; 3909 dlen = 0; 3910 for (j=0; j<jend; j++) { 3911 if (*jj < rstart || *jj >= rend) olen++; 3912 else dlen++; 3913 jj++; 3914 } 3915 olens[i] = olen; 3916 dlens[i] = dlen; 3917 } 3918 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3919 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3920 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3921 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3922 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3923 ierr = PetscFree(dlens);CHKERRQ(ierr); 3924 } else { 3925 PetscInt ml,nl; 3926 3927 M = *newmat; 3928 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3929 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3930 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3931 /* 3932 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3933 rather than the slower MatSetValues(). 3934 */ 3935 M->was_assembled = PETSC_TRUE; 3936 M->assembled = PETSC_FALSE; 3937 } 3938 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3939 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3940 ii = aij->i; 3941 jj = aij->j; 3942 aa = aij->a; 3943 for (i=0; i<m; i++) { 3944 row = rstart + i; 3945 nz = ii[i+1] - ii[i]; 3946 cwork = jj; jj += nz; 3947 vwork = aa; aa += nz; 3948 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3949 } 3950 3951 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3952 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3953 *newmat = M; 3954 3955 /* save submatrix used in processor for next request */ 3956 if (call == MAT_INITIAL_MATRIX) { 3957 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3958 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3959 } 3960 PetscFunctionReturn(0); 3961 } 3962 3963 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3964 { 3965 PetscInt m,cstart, cend,j,nnz,i,d; 3966 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3967 const PetscInt *JJ; 3968 PetscErrorCode ierr; 3969 PetscBool nooffprocentries; 3970 3971 PetscFunctionBegin; 3972 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3973 3974 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3975 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3976 m = B->rmap->n; 3977 cstart = B->cmap->rstart; 3978 cend = B->cmap->rend; 3979 rstart = B->rmap->rstart; 3980 3981 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3982 3983 #if defined(PETSC_USE_DEBUG) 3984 for (i=0; i<m; i++) { 3985 nnz = Ii[i+1]- Ii[i]; 3986 JJ = J + Ii[i]; 3987 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3988 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3989 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3990 } 3991 #endif 3992 3993 for (i=0; i<m; i++) { 3994 nnz = Ii[i+1]- Ii[i]; 3995 JJ = J + Ii[i]; 3996 nnz_max = PetscMax(nnz_max,nnz); 3997 d = 0; 3998 for (j=0; j<nnz; j++) { 3999 if (cstart <= JJ[j] && JJ[j] < cend) d++; 4000 } 4001 d_nnz[i] = d; 4002 o_nnz[i] = nnz - d; 4003 } 4004 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 4005 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 4006 4007 for (i=0; i<m; i++) { 4008 ii = i + rstart; 4009 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 4010 } 4011 nooffprocentries = B->nooffprocentries; 4012 B->nooffprocentries = PETSC_TRUE; 4013 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4014 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4015 B->nooffprocentries = nooffprocentries; 4016 4017 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4018 PetscFunctionReturn(0); 4019 } 4020 4021 /*@ 4022 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4023 (the default parallel PETSc format). 4024 4025 Collective 4026 4027 Input Parameters: 4028 + B - the matrix 4029 . i - the indices into j for the start of each local row (starts with zero) 4030 . j - the column indices for each local row (starts with zero) 4031 - v - optional values in the matrix 4032 4033 Level: developer 4034 4035 Notes: 4036 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 4037 thus you CANNOT change the matrix entries by changing the values of v[] after you have 4038 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4039 4040 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4041 4042 The format which is used for the sparse matrix input, is equivalent to a 4043 row-major ordering.. i.e for the following matrix, the input data expected is 4044 as shown 4045 4046 $ 1 0 0 4047 $ 2 0 3 P0 4048 $ ------- 4049 $ 4 5 6 P1 4050 $ 4051 $ Process0 [P0]: rows_owned=[0,1] 4052 $ i = {0,1,3} [size = nrow+1 = 2+1] 4053 $ j = {0,0,2} [size = 3] 4054 $ v = {1,2,3} [size = 3] 4055 $ 4056 $ Process1 [P1]: rows_owned=[2] 4057 $ i = {0,3} [size = nrow+1 = 1+1] 4058 $ j = {0,1,2} [size = 3] 4059 $ v = {4,5,6} [size = 3] 4060 4061 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4062 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4063 @*/ 4064 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4065 { 4066 PetscErrorCode ierr; 4067 4068 PetscFunctionBegin; 4069 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4070 PetscFunctionReturn(0); 4071 } 4072 4073 /*@C 4074 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4075 (the default parallel PETSc format). For good matrix assembly performance 4076 the user should preallocate the matrix storage by setting the parameters 4077 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4078 performance can be increased by more than a factor of 50. 4079 4080 Collective 4081 4082 Input Parameters: 4083 + B - the matrix 4084 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4085 (same value is used for all local rows) 4086 . d_nnz - array containing the number of nonzeros in the various rows of the 4087 DIAGONAL portion of the local submatrix (possibly different for each row) 4088 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4089 The size of this array is equal to the number of local rows, i.e 'm'. 4090 For matrices that will be factored, you must leave room for (and set) 4091 the diagonal entry even if it is zero. 4092 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4093 submatrix (same value is used for all local rows). 4094 - o_nnz - array containing the number of nonzeros in the various rows of the 4095 OFF-DIAGONAL portion of the local submatrix (possibly different for 4096 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4097 structure. The size of this array is equal to the number 4098 of local rows, i.e 'm'. 4099 4100 If the *_nnz parameter is given then the *_nz parameter is ignored 4101 4102 The AIJ format (also called the Yale sparse matrix format or 4103 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4104 storage. The stored row and column indices begin with zero. 4105 See Users-Manual: ch_mat for details. 4106 4107 The parallel matrix is partitioned such that the first m0 rows belong to 4108 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4109 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4110 4111 The DIAGONAL portion of the local submatrix of a processor can be defined 4112 as the submatrix which is obtained by extraction the part corresponding to 4113 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4114 first row that belongs to the processor, r2 is the last row belonging to 4115 the this processor, and c1-c2 is range of indices of the local part of a 4116 vector suitable for applying the matrix to. This is an mxn matrix. In the 4117 common case of a square matrix, the row and column ranges are the same and 4118 the DIAGONAL part is also square. The remaining portion of the local 4119 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4120 4121 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4122 4123 You can call MatGetInfo() to get information on how effective the preallocation was; 4124 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4125 You can also run with the option -info and look for messages with the string 4126 malloc in them to see if additional memory allocation was needed. 4127 4128 Example usage: 4129 4130 Consider the following 8x8 matrix with 34 non-zero values, that is 4131 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4132 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4133 as follows: 4134 4135 .vb 4136 1 2 0 | 0 3 0 | 0 4 4137 Proc0 0 5 6 | 7 0 0 | 8 0 4138 9 0 10 | 11 0 0 | 12 0 4139 ------------------------------------- 4140 13 0 14 | 15 16 17 | 0 0 4141 Proc1 0 18 0 | 19 20 21 | 0 0 4142 0 0 0 | 22 23 0 | 24 0 4143 ------------------------------------- 4144 Proc2 25 26 27 | 0 0 28 | 29 0 4145 30 0 0 | 31 32 33 | 0 34 4146 .ve 4147 4148 This can be represented as a collection of submatrices as: 4149 4150 .vb 4151 A B C 4152 D E F 4153 G H I 4154 .ve 4155 4156 Where the submatrices A,B,C are owned by proc0, D,E,F are 4157 owned by proc1, G,H,I are owned by proc2. 4158 4159 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4160 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4161 The 'M','N' parameters are 8,8, and have the same values on all procs. 4162 4163 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4164 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4165 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4166 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4167 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4168 matrix, ans [DF] as another SeqAIJ matrix. 4169 4170 When d_nz, o_nz parameters are specified, d_nz storage elements are 4171 allocated for every row of the local diagonal submatrix, and o_nz 4172 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4173 One way to choose d_nz and o_nz is to use the max nonzerors per local 4174 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4175 In this case, the values of d_nz,o_nz are: 4176 .vb 4177 proc0 : dnz = 2, o_nz = 2 4178 proc1 : dnz = 3, o_nz = 2 4179 proc2 : dnz = 1, o_nz = 4 4180 .ve 4181 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4182 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4183 for proc3. i.e we are using 12+15+10=37 storage locations to store 4184 34 values. 4185 4186 When d_nnz, o_nnz parameters are specified, the storage is specified 4187 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4188 In the above case the values for d_nnz,o_nnz are: 4189 .vb 4190 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4191 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4192 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4193 .ve 4194 Here the space allocated is sum of all the above values i.e 34, and 4195 hence pre-allocation is perfect. 4196 4197 Level: intermediate 4198 4199 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4200 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4201 @*/ 4202 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4203 { 4204 PetscErrorCode ierr; 4205 4206 PetscFunctionBegin; 4207 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4208 PetscValidType(B,1); 4209 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4210 PetscFunctionReturn(0); 4211 } 4212 4213 /*@ 4214 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4215 CSR format for the local rows. 4216 4217 Collective 4218 4219 Input Parameters: 4220 + comm - MPI communicator 4221 . m - number of local rows (Cannot be PETSC_DECIDE) 4222 . n - This value should be the same as the local size used in creating the 4223 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4224 calculated if N is given) For square matrices n is almost always m. 4225 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4226 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4227 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4228 . j - column indices 4229 - a - matrix values 4230 4231 Output Parameter: 4232 . mat - the matrix 4233 4234 Level: intermediate 4235 4236 Notes: 4237 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4238 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4239 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4240 4241 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4242 4243 The format which is used for the sparse matrix input, is equivalent to a 4244 row-major ordering.. i.e for the following matrix, the input data expected is 4245 as shown 4246 4247 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4248 4249 $ 1 0 0 4250 $ 2 0 3 P0 4251 $ ------- 4252 $ 4 5 6 P1 4253 $ 4254 $ Process0 [P0]: rows_owned=[0,1] 4255 $ i = {0,1,3} [size = nrow+1 = 2+1] 4256 $ j = {0,0,2} [size = 3] 4257 $ v = {1,2,3} [size = 3] 4258 $ 4259 $ Process1 [P1]: rows_owned=[2] 4260 $ i = {0,3} [size = nrow+1 = 1+1] 4261 $ j = {0,1,2} [size = 3] 4262 $ v = {4,5,6} [size = 3] 4263 4264 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4265 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4266 @*/ 4267 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4268 { 4269 PetscErrorCode ierr; 4270 4271 PetscFunctionBegin; 4272 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4273 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4274 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4275 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4276 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4277 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4278 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4279 PetscFunctionReturn(0); 4280 } 4281 4282 /*@ 4283 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4284 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4285 4286 Collective 4287 4288 Input Parameters: 4289 + mat - the matrix 4290 . m - number of local rows (Cannot be PETSC_DECIDE) 4291 . n - This value should be the same as the local size used in creating the 4292 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4293 calculated if N is given) For square matrices n is almost always m. 4294 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4295 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4296 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4297 . J - column indices 4298 - v - matrix values 4299 4300 Level: intermediate 4301 4302 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4303 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4304 @*/ 4305 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4306 { 4307 PetscErrorCode ierr; 4308 PetscInt cstart,nnz,i,j; 4309 PetscInt *ld; 4310 PetscBool nooffprocentries; 4311 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4312 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4313 PetscScalar *ad = Ad->a, *ao = Ao->a; 4314 const PetscInt *Adi = Ad->i; 4315 PetscInt ldi,Iii,md; 4316 4317 PetscFunctionBegin; 4318 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4319 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4320 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4321 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4322 4323 cstart = mat->cmap->rstart; 4324 if (!Aij->ld) { 4325 /* count number of entries below block diagonal */ 4326 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4327 Aij->ld = ld; 4328 for (i=0; i<m; i++) { 4329 nnz = Ii[i+1]- Ii[i]; 4330 j = 0; 4331 while (J[j] < cstart && j < nnz) {j++;} 4332 J += nnz; 4333 ld[i] = j; 4334 } 4335 } else { 4336 ld = Aij->ld; 4337 } 4338 4339 for (i=0; i<m; i++) { 4340 nnz = Ii[i+1]- Ii[i]; 4341 Iii = Ii[i]; 4342 ldi = ld[i]; 4343 md = Adi[i+1]-Adi[i]; 4344 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4345 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4346 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4347 ad += md; 4348 ao += nnz - md; 4349 } 4350 nooffprocentries = mat->nooffprocentries; 4351 mat->nooffprocentries = PETSC_TRUE; 4352 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4353 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4354 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4355 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4356 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4357 mat->nooffprocentries = nooffprocentries; 4358 PetscFunctionReturn(0); 4359 } 4360 4361 /*@C 4362 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4363 (the default parallel PETSc format). For good matrix assembly performance 4364 the user should preallocate the matrix storage by setting the parameters 4365 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4366 performance can be increased by more than a factor of 50. 4367 4368 Collective 4369 4370 Input Parameters: 4371 + comm - MPI communicator 4372 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4373 This value should be the same as the local size used in creating the 4374 y vector for the matrix-vector product y = Ax. 4375 . n - This value should be the same as the local size used in creating the 4376 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4377 calculated if N is given) For square matrices n is almost always m. 4378 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4379 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4380 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4381 (same value is used for all local rows) 4382 . d_nnz - array containing the number of nonzeros in the various rows of the 4383 DIAGONAL portion of the local submatrix (possibly different for each row) 4384 or NULL, if d_nz is used to specify the nonzero structure. 4385 The size of this array is equal to the number of local rows, i.e 'm'. 4386 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4387 submatrix (same value is used for all local rows). 4388 - o_nnz - array containing the number of nonzeros in the various rows of the 4389 OFF-DIAGONAL portion of the local submatrix (possibly different for 4390 each row) or NULL, if o_nz is used to specify the nonzero 4391 structure. The size of this array is equal to the number 4392 of local rows, i.e 'm'. 4393 4394 Output Parameter: 4395 . A - the matrix 4396 4397 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4398 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4399 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4400 4401 Notes: 4402 If the *_nnz parameter is given then the *_nz parameter is ignored 4403 4404 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4405 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4406 storage requirements for this matrix. 4407 4408 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4409 processor than it must be used on all processors that share the object for 4410 that argument. 4411 4412 The user MUST specify either the local or global matrix dimensions 4413 (possibly both). 4414 4415 The parallel matrix is partitioned across processors such that the 4416 first m0 rows belong to process 0, the next m1 rows belong to 4417 process 1, the next m2 rows belong to process 2 etc.. where 4418 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4419 values corresponding to [m x N] submatrix. 4420 4421 The columns are logically partitioned with the n0 columns belonging 4422 to 0th partition, the next n1 columns belonging to the next 4423 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4424 4425 The DIAGONAL portion of the local submatrix on any given processor 4426 is the submatrix corresponding to the rows and columns m,n 4427 corresponding to the given processor. i.e diagonal matrix on 4428 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4429 etc. The remaining portion of the local submatrix [m x (N-n)] 4430 constitute the OFF-DIAGONAL portion. The example below better 4431 illustrates this concept. 4432 4433 For a square global matrix we define each processor's diagonal portion 4434 to be its local rows and the corresponding columns (a square submatrix); 4435 each processor's off-diagonal portion encompasses the remainder of the 4436 local matrix (a rectangular submatrix). 4437 4438 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4439 4440 When calling this routine with a single process communicator, a matrix of 4441 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4442 type of communicator, use the construction mechanism 4443 .vb 4444 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4445 .ve 4446 4447 $ MatCreate(...,&A); 4448 $ MatSetType(A,MATMPIAIJ); 4449 $ MatSetSizes(A, m,n,M,N); 4450 $ MatMPIAIJSetPreallocation(A,...); 4451 4452 By default, this format uses inodes (identical nodes) when possible. 4453 We search for consecutive rows with the same nonzero structure, thereby 4454 reusing matrix information to achieve increased efficiency. 4455 4456 Options Database Keys: 4457 + -mat_no_inode - Do not use inodes 4458 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4459 4460 4461 4462 Example usage: 4463 4464 Consider the following 8x8 matrix with 34 non-zero values, that is 4465 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4466 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4467 as follows 4468 4469 .vb 4470 1 2 0 | 0 3 0 | 0 4 4471 Proc0 0 5 6 | 7 0 0 | 8 0 4472 9 0 10 | 11 0 0 | 12 0 4473 ------------------------------------- 4474 13 0 14 | 15 16 17 | 0 0 4475 Proc1 0 18 0 | 19 20 21 | 0 0 4476 0 0 0 | 22 23 0 | 24 0 4477 ------------------------------------- 4478 Proc2 25 26 27 | 0 0 28 | 29 0 4479 30 0 0 | 31 32 33 | 0 34 4480 .ve 4481 4482 This can be represented as a collection of submatrices as 4483 4484 .vb 4485 A B C 4486 D E F 4487 G H I 4488 .ve 4489 4490 Where the submatrices A,B,C are owned by proc0, D,E,F are 4491 owned by proc1, G,H,I are owned by proc2. 4492 4493 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4494 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4495 The 'M','N' parameters are 8,8, and have the same values on all procs. 4496 4497 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4498 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4499 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4500 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4501 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4502 matrix, ans [DF] as another SeqAIJ matrix. 4503 4504 When d_nz, o_nz parameters are specified, d_nz storage elements are 4505 allocated for every row of the local diagonal submatrix, and o_nz 4506 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4507 One way to choose d_nz and o_nz is to use the max nonzerors per local 4508 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4509 In this case, the values of d_nz,o_nz are 4510 .vb 4511 proc0 : dnz = 2, o_nz = 2 4512 proc1 : dnz = 3, o_nz = 2 4513 proc2 : dnz = 1, o_nz = 4 4514 .ve 4515 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4516 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4517 for proc3. i.e we are using 12+15+10=37 storage locations to store 4518 34 values. 4519 4520 When d_nnz, o_nnz parameters are specified, the storage is specified 4521 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4522 In the above case the values for d_nnz,o_nnz are 4523 .vb 4524 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4525 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4526 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4527 .ve 4528 Here the space allocated is sum of all the above values i.e 34, and 4529 hence pre-allocation is perfect. 4530 4531 Level: intermediate 4532 4533 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4534 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4535 @*/ 4536 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4537 { 4538 PetscErrorCode ierr; 4539 PetscMPIInt size; 4540 4541 PetscFunctionBegin; 4542 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4543 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4544 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4545 if (size > 1) { 4546 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4547 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4548 } else { 4549 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4550 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4551 } 4552 PetscFunctionReturn(0); 4553 } 4554 4555 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4556 { 4557 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4558 PetscBool flg; 4559 PetscErrorCode ierr; 4560 4561 PetscFunctionBegin; 4562 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4563 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4564 if (Ad) *Ad = a->A; 4565 if (Ao) *Ao = a->B; 4566 if (colmap) *colmap = a->garray; 4567 PetscFunctionReturn(0); 4568 } 4569 4570 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4571 { 4572 PetscErrorCode ierr; 4573 PetscInt m,N,i,rstart,nnz,Ii; 4574 PetscInt *indx; 4575 PetscScalar *values; 4576 4577 PetscFunctionBegin; 4578 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4579 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4580 PetscInt *dnz,*onz,sum,bs,cbs; 4581 4582 if (n == PETSC_DECIDE) { 4583 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4584 } 4585 /* Check sum(n) = N */ 4586 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4587 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4588 4589 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4590 rstart -= m; 4591 4592 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4593 for (i=0; i<m; i++) { 4594 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4595 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4596 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4597 } 4598 4599 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4600 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4601 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4602 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4603 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4604 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4605 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4606 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4607 } 4608 4609 /* numeric phase */ 4610 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4611 for (i=0; i<m; i++) { 4612 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4613 Ii = i + rstart; 4614 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4615 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4616 } 4617 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4618 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4619 PetscFunctionReturn(0); 4620 } 4621 4622 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4623 { 4624 PetscErrorCode ierr; 4625 PetscMPIInt rank; 4626 PetscInt m,N,i,rstart,nnz; 4627 size_t len; 4628 const PetscInt *indx; 4629 PetscViewer out; 4630 char *name; 4631 Mat B; 4632 const PetscScalar *values; 4633 4634 PetscFunctionBegin; 4635 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4636 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4637 /* Should this be the type of the diagonal block of A? */ 4638 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4639 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4640 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4641 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4642 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4643 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4644 for (i=0; i<m; i++) { 4645 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4646 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4647 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4648 } 4649 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4650 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4651 4652 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4653 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4654 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4655 sprintf(name,"%s.%d",outfile,rank); 4656 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4657 ierr = PetscFree(name);CHKERRQ(ierr); 4658 ierr = MatView(B,out);CHKERRQ(ierr); 4659 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4660 ierr = MatDestroy(&B);CHKERRQ(ierr); 4661 PetscFunctionReturn(0); 4662 } 4663 4664 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4665 { 4666 PetscErrorCode ierr; 4667 Mat_Merge_SeqsToMPI *merge; 4668 PetscContainer container; 4669 4670 PetscFunctionBegin; 4671 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4672 if (container) { 4673 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4674 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4675 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4676 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4677 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4678 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4679 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4680 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4681 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4682 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4683 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4684 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4685 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4686 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4687 ierr = PetscFree(merge);CHKERRQ(ierr); 4688 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4689 } 4690 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4691 PetscFunctionReturn(0); 4692 } 4693 4694 #include <../src/mat/utils/freespace.h> 4695 #include <petscbt.h> 4696 4697 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4698 { 4699 PetscErrorCode ierr; 4700 MPI_Comm comm; 4701 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4702 PetscMPIInt size,rank,taga,*len_s; 4703 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4704 PetscInt proc,m; 4705 PetscInt **buf_ri,**buf_rj; 4706 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4707 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4708 MPI_Request *s_waits,*r_waits; 4709 MPI_Status *status; 4710 MatScalar *aa=a->a; 4711 MatScalar **abuf_r,*ba_i; 4712 Mat_Merge_SeqsToMPI *merge; 4713 PetscContainer container; 4714 4715 PetscFunctionBegin; 4716 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4717 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4718 4719 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4720 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4721 4722 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4723 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4724 4725 bi = merge->bi; 4726 bj = merge->bj; 4727 buf_ri = merge->buf_ri; 4728 buf_rj = merge->buf_rj; 4729 4730 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4731 owners = merge->rowmap->range; 4732 len_s = merge->len_s; 4733 4734 /* send and recv matrix values */ 4735 /*-----------------------------*/ 4736 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4737 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4738 4739 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4740 for (proc=0,k=0; proc<size; proc++) { 4741 if (!len_s[proc]) continue; 4742 i = owners[proc]; 4743 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4744 k++; 4745 } 4746 4747 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4748 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4749 ierr = PetscFree(status);CHKERRQ(ierr); 4750 4751 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4752 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4753 4754 /* insert mat values of mpimat */ 4755 /*----------------------------*/ 4756 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4757 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4758 4759 for (k=0; k<merge->nrecv; k++) { 4760 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4761 nrows = *(buf_ri_k[k]); 4762 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4763 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4764 } 4765 4766 /* set values of ba */ 4767 m = merge->rowmap->n; 4768 for (i=0; i<m; i++) { 4769 arow = owners[rank] + i; 4770 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4771 bnzi = bi[i+1] - bi[i]; 4772 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4773 4774 /* add local non-zero vals of this proc's seqmat into ba */ 4775 anzi = ai[arow+1] - ai[arow]; 4776 aj = a->j + ai[arow]; 4777 aa = a->a + ai[arow]; 4778 nextaj = 0; 4779 for (j=0; nextaj<anzi; j++) { 4780 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4781 ba_i[j] += aa[nextaj++]; 4782 } 4783 } 4784 4785 /* add received vals into ba */ 4786 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4787 /* i-th row */ 4788 if (i == *nextrow[k]) { 4789 anzi = *(nextai[k]+1) - *nextai[k]; 4790 aj = buf_rj[k] + *(nextai[k]); 4791 aa = abuf_r[k] + *(nextai[k]); 4792 nextaj = 0; 4793 for (j=0; nextaj<anzi; j++) { 4794 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4795 ba_i[j] += aa[nextaj++]; 4796 } 4797 } 4798 nextrow[k]++; nextai[k]++; 4799 } 4800 } 4801 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4802 } 4803 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4804 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4805 4806 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4807 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4808 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4809 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4810 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4811 PetscFunctionReturn(0); 4812 } 4813 4814 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4815 { 4816 PetscErrorCode ierr; 4817 Mat B_mpi; 4818 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4819 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4820 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4821 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4822 PetscInt len,proc,*dnz,*onz,bs,cbs; 4823 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4824 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4825 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4826 MPI_Status *status; 4827 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4828 PetscBT lnkbt; 4829 Mat_Merge_SeqsToMPI *merge; 4830 PetscContainer container; 4831 4832 PetscFunctionBegin; 4833 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4834 4835 /* make sure it is a PETSc comm */ 4836 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4837 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4838 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4839 4840 ierr = PetscNew(&merge);CHKERRQ(ierr); 4841 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4842 4843 /* determine row ownership */ 4844 /*---------------------------------------------------------*/ 4845 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4846 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4847 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4848 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4849 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4850 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4851 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4852 4853 m = merge->rowmap->n; 4854 owners = merge->rowmap->range; 4855 4856 /* determine the number of messages to send, their lengths */ 4857 /*---------------------------------------------------------*/ 4858 len_s = merge->len_s; 4859 4860 len = 0; /* length of buf_si[] */ 4861 merge->nsend = 0; 4862 for (proc=0; proc<size; proc++) { 4863 len_si[proc] = 0; 4864 if (proc == rank) { 4865 len_s[proc] = 0; 4866 } else { 4867 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4868 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4869 } 4870 if (len_s[proc]) { 4871 merge->nsend++; 4872 nrows = 0; 4873 for (i=owners[proc]; i<owners[proc+1]; i++) { 4874 if (ai[i+1] > ai[i]) nrows++; 4875 } 4876 len_si[proc] = 2*(nrows+1); 4877 len += len_si[proc]; 4878 } 4879 } 4880 4881 /* determine the number and length of messages to receive for ij-structure */ 4882 /*-------------------------------------------------------------------------*/ 4883 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4884 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4885 4886 /* post the Irecv of j-structure */ 4887 /*-------------------------------*/ 4888 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4889 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4890 4891 /* post the Isend of j-structure */ 4892 /*--------------------------------*/ 4893 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4894 4895 for (proc=0, k=0; proc<size; proc++) { 4896 if (!len_s[proc]) continue; 4897 i = owners[proc]; 4898 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4899 k++; 4900 } 4901 4902 /* receives and sends of j-structure are complete */ 4903 /*------------------------------------------------*/ 4904 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4905 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4906 4907 /* send and recv i-structure */ 4908 /*---------------------------*/ 4909 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4910 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4911 4912 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4913 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4914 for (proc=0,k=0; proc<size; proc++) { 4915 if (!len_s[proc]) continue; 4916 /* form outgoing message for i-structure: 4917 buf_si[0]: nrows to be sent 4918 [1:nrows]: row index (global) 4919 [nrows+1:2*nrows+1]: i-structure index 4920 */ 4921 /*-------------------------------------------*/ 4922 nrows = len_si[proc]/2 - 1; 4923 buf_si_i = buf_si + nrows+1; 4924 buf_si[0] = nrows; 4925 buf_si_i[0] = 0; 4926 nrows = 0; 4927 for (i=owners[proc]; i<owners[proc+1]; i++) { 4928 anzi = ai[i+1] - ai[i]; 4929 if (anzi) { 4930 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4931 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4932 nrows++; 4933 } 4934 } 4935 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4936 k++; 4937 buf_si += len_si[proc]; 4938 } 4939 4940 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4941 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4942 4943 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4944 for (i=0; i<merge->nrecv; i++) { 4945 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4946 } 4947 4948 ierr = PetscFree(len_si);CHKERRQ(ierr); 4949 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4950 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4951 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4952 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4953 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4954 ierr = PetscFree(status);CHKERRQ(ierr); 4955 4956 /* compute a local seq matrix in each processor */ 4957 /*----------------------------------------------*/ 4958 /* allocate bi array and free space for accumulating nonzero column info */ 4959 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4960 bi[0] = 0; 4961 4962 /* create and initialize a linked list */ 4963 nlnk = N+1; 4964 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4965 4966 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4967 len = ai[owners[rank+1]] - ai[owners[rank]]; 4968 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4969 4970 current_space = free_space; 4971 4972 /* determine symbolic info for each local row */ 4973 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4974 4975 for (k=0; k<merge->nrecv; k++) { 4976 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4977 nrows = *buf_ri_k[k]; 4978 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4979 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4980 } 4981 4982 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4983 len = 0; 4984 for (i=0; i<m; i++) { 4985 bnzi = 0; 4986 /* add local non-zero cols of this proc's seqmat into lnk */ 4987 arow = owners[rank] + i; 4988 anzi = ai[arow+1] - ai[arow]; 4989 aj = a->j + ai[arow]; 4990 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4991 bnzi += nlnk; 4992 /* add received col data into lnk */ 4993 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4994 if (i == *nextrow[k]) { /* i-th row */ 4995 anzi = *(nextai[k]+1) - *nextai[k]; 4996 aj = buf_rj[k] + *nextai[k]; 4997 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4998 bnzi += nlnk; 4999 nextrow[k]++; nextai[k]++; 5000 } 5001 } 5002 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5003 5004 /* if free space is not available, make more free space */ 5005 if (current_space->local_remaining<bnzi) { 5006 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 5007 nspacedouble++; 5008 } 5009 /* copy data into free space, then initialize lnk */ 5010 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5011 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5012 5013 current_space->array += bnzi; 5014 current_space->local_used += bnzi; 5015 current_space->local_remaining -= bnzi; 5016 5017 bi[i+1] = bi[i] + bnzi; 5018 } 5019 5020 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5021 5022 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 5023 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5024 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5025 5026 /* create symbolic parallel matrix B_mpi */ 5027 /*---------------------------------------*/ 5028 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5029 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5030 if (n==PETSC_DECIDE) { 5031 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5032 } else { 5033 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5034 } 5035 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5036 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5037 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5038 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5039 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5040 5041 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5042 B_mpi->assembled = PETSC_FALSE; 5043 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5044 merge->bi = bi; 5045 merge->bj = bj; 5046 merge->buf_ri = buf_ri; 5047 merge->buf_rj = buf_rj; 5048 merge->coi = NULL; 5049 merge->coj = NULL; 5050 merge->owners_co = NULL; 5051 5052 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5053 5054 /* attach the supporting struct to B_mpi for reuse */ 5055 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5056 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5057 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5058 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5059 *mpimat = B_mpi; 5060 5061 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5062 PetscFunctionReturn(0); 5063 } 5064 5065 /*@C 5066 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5067 matrices from each processor 5068 5069 Collective 5070 5071 Input Parameters: 5072 + comm - the communicators the parallel matrix will live on 5073 . seqmat - the input sequential matrices 5074 . m - number of local rows (or PETSC_DECIDE) 5075 . n - number of local columns (or PETSC_DECIDE) 5076 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5077 5078 Output Parameter: 5079 . mpimat - the parallel matrix generated 5080 5081 Level: advanced 5082 5083 Notes: 5084 The dimensions of the sequential matrix in each processor MUST be the same. 5085 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5086 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5087 @*/ 5088 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5089 { 5090 PetscErrorCode ierr; 5091 PetscMPIInt size; 5092 5093 PetscFunctionBegin; 5094 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5095 if (size == 1) { 5096 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5097 if (scall == MAT_INITIAL_MATRIX) { 5098 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5099 } else { 5100 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5101 } 5102 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5103 PetscFunctionReturn(0); 5104 } 5105 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5106 if (scall == MAT_INITIAL_MATRIX) { 5107 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5108 } 5109 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5110 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5111 PetscFunctionReturn(0); 5112 } 5113 5114 /*@ 5115 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5116 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5117 with MatGetSize() 5118 5119 Not Collective 5120 5121 Input Parameters: 5122 + A - the matrix 5123 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5124 5125 Output Parameter: 5126 . A_loc - the local sequential matrix generated 5127 5128 Level: developer 5129 5130 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5131 5132 @*/ 5133 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5134 { 5135 PetscErrorCode ierr; 5136 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5137 Mat_SeqAIJ *mat,*a,*b; 5138 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5139 MatScalar *aa,*ba,*cam; 5140 PetscScalar *ca; 5141 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5142 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5143 PetscBool match; 5144 MPI_Comm comm; 5145 PetscMPIInt size; 5146 5147 PetscFunctionBegin; 5148 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5149 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5150 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5151 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5152 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5153 5154 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5155 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5156 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5157 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5158 aa = a->a; ba = b->a; 5159 if (scall == MAT_INITIAL_MATRIX) { 5160 if (size == 1) { 5161 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5162 PetscFunctionReturn(0); 5163 } 5164 5165 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5166 ci[0] = 0; 5167 for (i=0; i<am; i++) { 5168 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5169 } 5170 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5171 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5172 k = 0; 5173 for (i=0; i<am; i++) { 5174 ncols_o = bi[i+1] - bi[i]; 5175 ncols_d = ai[i+1] - ai[i]; 5176 /* off-diagonal portion of A */ 5177 for (jo=0; jo<ncols_o; jo++) { 5178 col = cmap[*bj]; 5179 if (col >= cstart) break; 5180 cj[k] = col; bj++; 5181 ca[k++] = *ba++; 5182 } 5183 /* diagonal portion of A */ 5184 for (j=0; j<ncols_d; j++) { 5185 cj[k] = cstart + *aj++; 5186 ca[k++] = *aa++; 5187 } 5188 /* off-diagonal portion of A */ 5189 for (j=jo; j<ncols_o; j++) { 5190 cj[k] = cmap[*bj++]; 5191 ca[k++] = *ba++; 5192 } 5193 } 5194 /* put together the new matrix */ 5195 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5196 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5197 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5198 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5199 mat->free_a = PETSC_TRUE; 5200 mat->free_ij = PETSC_TRUE; 5201 mat->nonew = 0; 5202 } else if (scall == MAT_REUSE_MATRIX) { 5203 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5204 ci = mat->i; cj = mat->j; cam = mat->a; 5205 for (i=0; i<am; i++) { 5206 /* off-diagonal portion of A */ 5207 ncols_o = bi[i+1] - bi[i]; 5208 for (jo=0; jo<ncols_o; jo++) { 5209 col = cmap[*bj]; 5210 if (col >= cstart) break; 5211 *cam++ = *ba++; bj++; 5212 } 5213 /* diagonal portion of A */ 5214 ncols_d = ai[i+1] - ai[i]; 5215 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5216 /* off-diagonal portion of A */ 5217 for (j=jo; j<ncols_o; j++) { 5218 *cam++ = *ba++; bj++; 5219 } 5220 } 5221 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5222 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5223 PetscFunctionReturn(0); 5224 } 5225 5226 /*@C 5227 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5228 5229 Not Collective 5230 5231 Input Parameters: 5232 + A - the matrix 5233 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5234 - row, col - index sets of rows and columns to extract (or NULL) 5235 5236 Output Parameter: 5237 . A_loc - the local sequential matrix generated 5238 5239 Level: developer 5240 5241 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5242 5243 @*/ 5244 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5245 { 5246 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5247 PetscErrorCode ierr; 5248 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5249 IS isrowa,iscola; 5250 Mat *aloc; 5251 PetscBool match; 5252 5253 PetscFunctionBegin; 5254 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5255 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5256 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5257 if (!row) { 5258 start = A->rmap->rstart; end = A->rmap->rend; 5259 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5260 } else { 5261 isrowa = *row; 5262 } 5263 if (!col) { 5264 start = A->cmap->rstart; 5265 cmap = a->garray; 5266 nzA = a->A->cmap->n; 5267 nzB = a->B->cmap->n; 5268 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5269 ncols = 0; 5270 for (i=0; i<nzB; i++) { 5271 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5272 else break; 5273 } 5274 imark = i; 5275 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5276 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5277 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5278 } else { 5279 iscola = *col; 5280 } 5281 if (scall != MAT_INITIAL_MATRIX) { 5282 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5283 aloc[0] = *A_loc; 5284 } 5285 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5286 if (!col) { /* attach global id of condensed columns */ 5287 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5288 } 5289 *A_loc = aloc[0]; 5290 ierr = PetscFree(aloc);CHKERRQ(ierr); 5291 if (!row) { 5292 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5293 } 5294 if (!col) { 5295 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5296 } 5297 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5298 PetscFunctionReturn(0); 5299 } 5300 5301 /* 5302 * Destroy a mat that may be composed with PetscSF communication objects. 5303 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5304 * */ 5305 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5306 { 5307 PetscSF sf,osf; 5308 IS map; 5309 PetscErrorCode ierr; 5310 5311 PetscFunctionBegin; 5312 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5313 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5314 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5315 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5316 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5317 ierr = ISDestroy(&map);CHKERRQ(ierr); 5318 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5319 PetscFunctionReturn(0); 5320 } 5321 5322 /* 5323 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5324 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5325 * on a global size. 5326 * */ 5327 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5328 { 5329 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5330 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5331 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,owner,lidx,*nrcols,*nlcols,ncol; 5332 PetscSFNode *iremote,*oiremote; 5333 const PetscInt *lrowindices; 5334 PetscErrorCode ierr; 5335 PetscSF sf,osf; 5336 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5337 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5338 MPI_Comm comm; 5339 ISLocalToGlobalMapping mapping; 5340 5341 PetscFunctionBegin; 5342 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5343 /* plocalsize is the number of roots 5344 * nrows is the number of leaves 5345 * */ 5346 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5347 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5348 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5349 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5350 for (i=0;i<nrows;i++) { 5351 /* Find a remote index and an owner for a row 5352 * The row could be local or remote 5353 * */ 5354 owner = 0; 5355 lidx = 0; 5356 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5357 iremote[i].index = lidx; 5358 iremote[i].rank = owner; 5359 } 5360 /* Create SF to communicate how many nonzero columns for each row */ 5361 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5362 /* SF will figure out the number of nonzero colunms for each row, and their 5363 * offsets 5364 * */ 5365 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5366 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5367 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5368 5369 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5370 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5371 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5372 roffsets[0] = 0; 5373 roffsets[1] = 0; 5374 for (i=0;i<plocalsize;i++) { 5375 /* diag */ 5376 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5377 /* off diag */ 5378 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5379 /* compute offsets so that we relative location for each row */ 5380 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5381 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5382 } 5383 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5384 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5385 /* 'r' means root, and 'l' means leaf */ 5386 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5387 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5388 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5389 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5390 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5391 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5392 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5393 dntotalcols = 0; 5394 ontotalcols = 0; 5395 ncol = 0; 5396 for (i=0;i<nrows;i++) { 5397 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5398 ncol = PetscMax(pnnz[i],ncol); 5399 /* diag */ 5400 dntotalcols += nlcols[i*2+0]; 5401 /* off diag */ 5402 ontotalcols += nlcols[i*2+1]; 5403 } 5404 /* We do not need to figure the right number of columns 5405 * since all the calculations will be done by going through the raw data 5406 * */ 5407 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5408 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5409 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5410 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5411 /* diag */ 5412 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5413 /* off diag */ 5414 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5415 /* diag */ 5416 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5417 /* off diag */ 5418 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5419 dntotalcols = 0; 5420 ontotalcols = 0; 5421 ntotalcols = 0; 5422 for (i=0;i<nrows;i++) { 5423 owner = 0; 5424 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5425 /* Set iremote for diag matrix */ 5426 for (j=0;j<nlcols[i*2+0];j++) { 5427 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5428 iremote[dntotalcols].rank = owner; 5429 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5430 ilocal[dntotalcols++] = ntotalcols++; 5431 } 5432 /* off diag */ 5433 for (j=0;j<nlcols[i*2+1];j++) { 5434 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5435 oiremote[ontotalcols].rank = owner; 5436 oilocal[ontotalcols++] = ntotalcols++; 5437 } 5438 } 5439 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5440 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5441 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5442 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5443 /* P serves as roots and P_oth is leaves 5444 * Diag matrix 5445 * */ 5446 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5447 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5448 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5449 5450 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5451 /* Off diag */ 5452 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5453 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5454 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5455 /* We operate on the matrix internal data for saving memory */ 5456 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5457 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5458 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5459 /* Convert to global indices for diag matrix */ 5460 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5461 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5462 /* We want P_oth store global indices */ 5463 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5464 /* Use memory scalable approach */ 5465 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5466 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5467 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5468 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5469 /* Convert back to local indices */ 5470 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5471 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5472 nout = 0; 5473 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5474 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5475 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5476 /* Exchange values */ 5477 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5478 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5479 /* Stop PETSc from shrinking memory */ 5480 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5481 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5482 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5483 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5484 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5485 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5486 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5487 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5488 PetscFunctionReturn(0); 5489 } 5490 5491 /* 5492 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5493 * This supports MPIAIJ and MAIJ 5494 * */ 5495 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5496 { 5497 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5498 Mat_SeqAIJ *p_oth; 5499 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5500 IS rows,map; 5501 PetscHMapI hamp; 5502 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5503 MPI_Comm comm; 5504 PetscSF sf,osf; 5505 PetscBool has; 5506 PetscErrorCode ierr; 5507 5508 PetscFunctionBegin; 5509 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5510 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5511 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5512 * and then create a submatrix (that often is an overlapping matrix) 5513 * */ 5514 if (reuse==MAT_INITIAL_MATRIX) { 5515 /* Use a hash table to figure out unique keys */ 5516 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5517 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5518 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5519 count = 0; 5520 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5521 for (i=0;i<a->B->cmap->n;i++) { 5522 key = a->garray[i]/dof; 5523 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5524 if (!has) { 5525 mapping[i] = count; 5526 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5527 } else { 5528 /* Current 'i' has the same value the previous step */ 5529 mapping[i] = count-1; 5530 } 5531 } 5532 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5533 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5534 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5535 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5536 off = 0; 5537 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5538 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5539 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5540 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5541 /* In case, the matrix was already created but users want to recreate the matrix */ 5542 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5543 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5544 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5545 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5546 } else if (reuse==MAT_REUSE_MATRIX) { 5547 /* If matrix was already created, we simply update values using SF objects 5548 * that as attached to the matrix ealier. 5549 * */ 5550 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5551 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5552 if (!sf || !osf) { 5553 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5554 } 5555 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5556 /* Update values in place */ 5557 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5558 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5559 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5560 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5561 } else { 5562 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5563 } 5564 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5565 PetscFunctionReturn(0); 5566 } 5567 5568 /*@C 5569 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5570 5571 Collective on Mat 5572 5573 Input Parameters: 5574 + A,B - the matrices in mpiaij format 5575 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5576 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5577 5578 Output Parameter: 5579 + rowb, colb - index sets of rows and columns of B to extract 5580 - B_seq - the sequential matrix generated 5581 5582 Level: developer 5583 5584 @*/ 5585 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5586 { 5587 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5588 PetscErrorCode ierr; 5589 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5590 IS isrowb,iscolb; 5591 Mat *bseq=NULL; 5592 5593 PetscFunctionBegin; 5594 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5595 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5596 } 5597 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5598 5599 if (scall == MAT_INITIAL_MATRIX) { 5600 start = A->cmap->rstart; 5601 cmap = a->garray; 5602 nzA = a->A->cmap->n; 5603 nzB = a->B->cmap->n; 5604 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5605 ncols = 0; 5606 for (i=0; i<nzB; i++) { /* row < local row index */ 5607 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5608 else break; 5609 } 5610 imark = i; 5611 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5612 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5613 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5614 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5615 } else { 5616 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5617 isrowb = *rowb; iscolb = *colb; 5618 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5619 bseq[0] = *B_seq; 5620 } 5621 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5622 *B_seq = bseq[0]; 5623 ierr = PetscFree(bseq);CHKERRQ(ierr); 5624 if (!rowb) { 5625 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5626 } else { 5627 *rowb = isrowb; 5628 } 5629 if (!colb) { 5630 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5631 } else { 5632 *colb = iscolb; 5633 } 5634 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5635 PetscFunctionReturn(0); 5636 } 5637 5638 /* 5639 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5640 of the OFF-DIAGONAL portion of local A 5641 5642 Collective on Mat 5643 5644 Input Parameters: 5645 + A,B - the matrices in mpiaij format 5646 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5647 5648 Output Parameter: 5649 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5650 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5651 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5652 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5653 5654 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5655 for this matrix. This is not desirable.. 5656 5657 Level: developer 5658 5659 */ 5660 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5661 { 5662 PetscErrorCode ierr; 5663 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5664 Mat_SeqAIJ *b_oth; 5665 VecScatter ctx; 5666 MPI_Comm comm; 5667 const PetscMPIInt *rprocs,*sprocs; 5668 const PetscInt *srow,*rstarts,*sstarts; 5669 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5670 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5671 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5672 MPI_Request *rwaits = NULL,*swaits = NULL; 5673 MPI_Status rstatus; 5674 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5675 5676 PetscFunctionBegin; 5677 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5678 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5679 5680 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5681 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5682 } 5683 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5684 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5685 5686 if (size == 1) { 5687 startsj_s = NULL; 5688 bufa_ptr = NULL; 5689 *B_oth = NULL; 5690 PetscFunctionReturn(0); 5691 } 5692 5693 ctx = a->Mvctx; 5694 tag = ((PetscObject)ctx)->tag; 5695 5696 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5697 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5698 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5699 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5700 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5701 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5702 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5703 5704 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5705 if (scall == MAT_INITIAL_MATRIX) { 5706 /* i-array */ 5707 /*---------*/ 5708 /* post receives */ 5709 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5710 for (i=0; i<nrecvs; i++) { 5711 rowlen = rvalues + rstarts[i]*rbs; 5712 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5713 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5714 } 5715 5716 /* pack the outgoing message */ 5717 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5718 5719 sstartsj[0] = 0; 5720 rstartsj[0] = 0; 5721 len = 0; /* total length of j or a array to be sent */ 5722 if (nsends) { 5723 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5724 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5725 } 5726 for (i=0; i<nsends; i++) { 5727 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5728 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5729 for (j=0; j<nrows; j++) { 5730 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5731 for (l=0; l<sbs; l++) { 5732 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5733 5734 rowlen[j*sbs+l] = ncols; 5735 5736 len += ncols; 5737 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5738 } 5739 k++; 5740 } 5741 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5742 5743 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5744 } 5745 /* recvs and sends of i-array are completed */ 5746 i = nrecvs; 5747 while (i--) { 5748 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5749 } 5750 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5751 ierr = PetscFree(svalues);CHKERRQ(ierr); 5752 5753 /* allocate buffers for sending j and a arrays */ 5754 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5755 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5756 5757 /* create i-array of B_oth */ 5758 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5759 5760 b_othi[0] = 0; 5761 len = 0; /* total length of j or a array to be received */ 5762 k = 0; 5763 for (i=0; i<nrecvs; i++) { 5764 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5765 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5766 for (j=0; j<nrows; j++) { 5767 b_othi[k+1] = b_othi[k] + rowlen[j]; 5768 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5769 k++; 5770 } 5771 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5772 } 5773 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5774 5775 /* allocate space for j and a arrrays of B_oth */ 5776 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5777 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5778 5779 /* j-array */ 5780 /*---------*/ 5781 /* post receives of j-array */ 5782 for (i=0; i<nrecvs; i++) { 5783 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5784 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5785 } 5786 5787 /* pack the outgoing message j-array */ 5788 if (nsends) k = sstarts[0]; 5789 for (i=0; i<nsends; i++) { 5790 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5791 bufJ = bufj+sstartsj[i]; 5792 for (j=0; j<nrows; j++) { 5793 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5794 for (ll=0; ll<sbs; ll++) { 5795 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5796 for (l=0; l<ncols; l++) { 5797 *bufJ++ = cols[l]; 5798 } 5799 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5800 } 5801 } 5802 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5803 } 5804 5805 /* recvs and sends of j-array are completed */ 5806 i = nrecvs; 5807 while (i--) { 5808 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5809 } 5810 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5811 } else if (scall == MAT_REUSE_MATRIX) { 5812 sstartsj = *startsj_s; 5813 rstartsj = *startsj_r; 5814 bufa = *bufa_ptr; 5815 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5816 b_otha = b_oth->a; 5817 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5818 5819 /* a-array */ 5820 /*---------*/ 5821 /* post receives of a-array */ 5822 for (i=0; i<nrecvs; i++) { 5823 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5824 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5825 } 5826 5827 /* pack the outgoing message a-array */ 5828 if (nsends) k = sstarts[0]; 5829 for (i=0; i<nsends; i++) { 5830 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5831 bufA = bufa+sstartsj[i]; 5832 for (j=0; j<nrows; j++) { 5833 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5834 for (ll=0; ll<sbs; ll++) { 5835 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5836 for (l=0; l<ncols; l++) { 5837 *bufA++ = vals[l]; 5838 } 5839 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5840 } 5841 } 5842 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5843 } 5844 /* recvs and sends of a-array are completed */ 5845 i = nrecvs; 5846 while (i--) { 5847 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5848 } 5849 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5850 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5851 5852 if (scall == MAT_INITIAL_MATRIX) { 5853 /* put together the new matrix */ 5854 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5855 5856 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5857 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5858 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5859 b_oth->free_a = PETSC_TRUE; 5860 b_oth->free_ij = PETSC_TRUE; 5861 b_oth->nonew = 0; 5862 5863 ierr = PetscFree(bufj);CHKERRQ(ierr); 5864 if (!startsj_s || !bufa_ptr) { 5865 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5866 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5867 } else { 5868 *startsj_s = sstartsj; 5869 *startsj_r = rstartsj; 5870 *bufa_ptr = bufa; 5871 } 5872 } 5873 5874 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5875 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5876 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5877 PetscFunctionReturn(0); 5878 } 5879 5880 /*@C 5881 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5882 5883 Not Collective 5884 5885 Input Parameters: 5886 . A - The matrix in mpiaij format 5887 5888 Output Parameter: 5889 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5890 . colmap - A map from global column index to local index into lvec 5891 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5892 5893 Level: developer 5894 5895 @*/ 5896 #if defined(PETSC_USE_CTABLE) 5897 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5898 #else 5899 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5900 #endif 5901 { 5902 Mat_MPIAIJ *a; 5903 5904 PetscFunctionBegin; 5905 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5906 PetscValidPointer(lvec, 2); 5907 PetscValidPointer(colmap, 3); 5908 PetscValidPointer(multScatter, 4); 5909 a = (Mat_MPIAIJ*) A->data; 5910 if (lvec) *lvec = a->lvec; 5911 if (colmap) *colmap = a->colmap; 5912 if (multScatter) *multScatter = a->Mvctx; 5913 PetscFunctionReturn(0); 5914 } 5915 5916 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5917 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5918 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5919 #if defined(PETSC_HAVE_MKL_SPARSE) 5920 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5921 #endif 5922 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5923 #if defined(PETSC_HAVE_ELEMENTAL) 5924 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5925 #endif 5926 #if defined(PETSC_HAVE_HYPRE) 5927 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5928 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5929 #endif 5930 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5932 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5933 5934 /* 5935 Computes (B'*A')' since computing B*A directly is untenable 5936 5937 n p p 5938 ( ) ( ) ( ) 5939 m ( A ) * n ( B ) = m ( C ) 5940 ( ) ( ) ( ) 5941 5942 */ 5943 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5944 { 5945 PetscErrorCode ierr; 5946 Mat At,Bt,Ct; 5947 5948 PetscFunctionBegin; 5949 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5950 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5951 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5952 ierr = MatDestroy(&At);CHKERRQ(ierr); 5953 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5954 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5955 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5956 PetscFunctionReturn(0); 5957 } 5958 5959 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5960 { 5961 PetscErrorCode ierr; 5962 PetscInt m=A->rmap->n,n=B->cmap->n; 5963 Mat Cmat; 5964 5965 PetscFunctionBegin; 5966 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5967 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5968 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5969 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5970 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5971 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5972 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5973 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5974 5975 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5976 5977 *C = Cmat; 5978 PetscFunctionReturn(0); 5979 } 5980 5981 /* ----------------------------------------------------------------*/ 5982 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5983 { 5984 PetscErrorCode ierr; 5985 5986 PetscFunctionBegin; 5987 if (scall == MAT_INITIAL_MATRIX) { 5988 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5989 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5990 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5991 } 5992 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5993 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5994 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5995 PetscFunctionReturn(0); 5996 } 5997 5998 /*MC 5999 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6000 6001 Options Database Keys: 6002 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6003 6004 Level: beginner 6005 6006 Notes: 6007 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6008 in this case the values associated with the rows and columns one passes in are set to zero 6009 in the matrix 6010 6011 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6012 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6013 6014 .seealso: MatCreateAIJ() 6015 M*/ 6016 6017 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6018 { 6019 Mat_MPIAIJ *b; 6020 PetscErrorCode ierr; 6021 PetscMPIInt size; 6022 6023 PetscFunctionBegin; 6024 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 6025 6026 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6027 B->data = (void*)b; 6028 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6029 B->assembled = PETSC_FALSE; 6030 B->insertmode = NOT_SET_VALUES; 6031 b->size = size; 6032 6033 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 6034 6035 /* build cache for off array entries formed */ 6036 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6037 6038 b->donotstash = PETSC_FALSE; 6039 b->colmap = 0; 6040 b->garray = 0; 6041 b->roworiented = PETSC_TRUE; 6042 6043 /* stuff used for matrix vector multiply */ 6044 b->lvec = NULL; 6045 b->Mvctx = NULL; 6046 6047 /* stuff for MatGetRow() */ 6048 b->rowindices = 0; 6049 b->rowvalues = 0; 6050 b->getrowactive = PETSC_FALSE; 6051 6052 /* flexible pointer used in CUSP/CUSPARSE classes */ 6053 b->spptr = NULL; 6054 6055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6060 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6065 #if defined(PETSC_HAVE_MKL_SPARSE) 6066 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6067 #endif 6068 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6069 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6070 #if defined(PETSC_HAVE_ELEMENTAL) 6071 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6072 #endif 6073 #if defined(PETSC_HAVE_HYPRE) 6074 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6075 #endif 6076 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6077 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6078 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 6079 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 6080 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 6081 #if defined(PETSC_HAVE_HYPRE) 6082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6083 #endif 6084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 6085 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6086 PetscFunctionReturn(0); 6087 } 6088 6089 /*@C 6090 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6091 and "off-diagonal" part of the matrix in CSR format. 6092 6093 Collective 6094 6095 Input Parameters: 6096 + comm - MPI communicator 6097 . m - number of local rows (Cannot be PETSC_DECIDE) 6098 . n - This value should be the same as the local size used in creating the 6099 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6100 calculated if N is given) For square matrices n is almost always m. 6101 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6102 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6103 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6104 . j - column indices 6105 . a - matrix values 6106 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6107 . oj - column indices 6108 - oa - matrix values 6109 6110 Output Parameter: 6111 . mat - the matrix 6112 6113 Level: advanced 6114 6115 Notes: 6116 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6117 must free the arrays once the matrix has been destroyed and not before. 6118 6119 The i and j indices are 0 based 6120 6121 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6122 6123 This sets local rows and cannot be used to set off-processor values. 6124 6125 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6126 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6127 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6128 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6129 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6130 communication if it is known that only local entries will be set. 6131 6132 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6133 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6134 @*/ 6135 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6136 { 6137 PetscErrorCode ierr; 6138 Mat_MPIAIJ *maij; 6139 6140 PetscFunctionBegin; 6141 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6142 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6143 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6144 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6145 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6146 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6147 maij = (Mat_MPIAIJ*) (*mat)->data; 6148 6149 (*mat)->preallocated = PETSC_TRUE; 6150 6151 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6152 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6153 6154 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6155 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6156 6157 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6158 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6159 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6160 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6161 6162 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6163 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6164 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6165 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6166 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6167 PetscFunctionReturn(0); 6168 } 6169 6170 /* 6171 Special version for direct calls from Fortran 6172 */ 6173 #include <petsc/private/fortranimpl.h> 6174 6175 /* Change these macros so can be used in void function */ 6176 #undef CHKERRQ 6177 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6178 #undef SETERRQ2 6179 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6180 #undef SETERRQ3 6181 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6182 #undef SETERRQ 6183 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6184 6185 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6186 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6187 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6188 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6189 #else 6190 #endif 6191 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6192 { 6193 Mat mat = *mmat; 6194 PetscInt m = *mm, n = *mn; 6195 InsertMode addv = *maddv; 6196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6197 PetscScalar value; 6198 PetscErrorCode ierr; 6199 6200 MatCheckPreallocated(mat,1); 6201 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6202 6203 #if defined(PETSC_USE_DEBUG) 6204 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6205 #endif 6206 { 6207 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6208 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6209 PetscBool roworiented = aij->roworiented; 6210 6211 /* Some Variables required in the macro */ 6212 Mat A = aij->A; 6213 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6214 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6215 MatScalar *aa = a->a; 6216 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6217 Mat B = aij->B; 6218 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6219 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6220 MatScalar *ba = b->a; 6221 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6222 * cannot use "#if defined" inside a macro. */ 6223 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6224 6225 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6226 PetscInt nonew = a->nonew; 6227 MatScalar *ap1,*ap2; 6228 6229 PetscFunctionBegin; 6230 for (i=0; i<m; i++) { 6231 if (im[i] < 0) continue; 6232 #if defined(PETSC_USE_DEBUG) 6233 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6234 #endif 6235 if (im[i] >= rstart && im[i] < rend) { 6236 row = im[i] - rstart; 6237 lastcol1 = -1; 6238 rp1 = aj + ai[row]; 6239 ap1 = aa + ai[row]; 6240 rmax1 = aimax[row]; 6241 nrow1 = ailen[row]; 6242 low1 = 0; 6243 high1 = nrow1; 6244 lastcol2 = -1; 6245 rp2 = bj + bi[row]; 6246 ap2 = ba + bi[row]; 6247 rmax2 = bimax[row]; 6248 nrow2 = bilen[row]; 6249 low2 = 0; 6250 high2 = nrow2; 6251 6252 for (j=0; j<n; j++) { 6253 if (roworiented) value = v[i*n+j]; 6254 else value = v[i+j*m]; 6255 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6256 if (in[j] >= cstart && in[j] < cend) { 6257 col = in[j] - cstart; 6258 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6259 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6260 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6261 #endif 6262 } else if (in[j] < 0) continue; 6263 #if defined(PETSC_USE_DEBUG) 6264 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6265 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 6266 #endif 6267 else { 6268 if (mat->was_assembled) { 6269 if (!aij->colmap) { 6270 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6271 } 6272 #if defined(PETSC_USE_CTABLE) 6273 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6274 col--; 6275 #else 6276 col = aij->colmap[in[j]] - 1; 6277 #endif 6278 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6279 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6280 col = in[j]; 6281 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6282 B = aij->B; 6283 b = (Mat_SeqAIJ*)B->data; 6284 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6285 rp2 = bj + bi[row]; 6286 ap2 = ba + bi[row]; 6287 rmax2 = bimax[row]; 6288 nrow2 = bilen[row]; 6289 low2 = 0; 6290 high2 = nrow2; 6291 bm = aij->B->rmap->n; 6292 ba = b->a; 6293 inserted = PETSC_FALSE; 6294 } 6295 } else col = in[j]; 6296 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6297 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6298 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6299 #endif 6300 } 6301 } 6302 } else if (!aij->donotstash) { 6303 if (roworiented) { 6304 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6305 } else { 6306 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6307 } 6308 } 6309 } 6310 } 6311 PetscFunctionReturnVoid(); 6312 } 6313