1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 #if defined(PETSC_USE_DEBUG) 583 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 584 #endif 585 if (im[i] >= rstart && im[i] < rend) { 586 row = im[i] - rstart; 587 lastcol1 = -1; 588 rp1 = aj + ai[row]; 589 ap1 = aa + ai[row]; 590 rmax1 = aimax[row]; 591 nrow1 = ailen[row]; 592 low1 = 0; 593 high1 = nrow1; 594 lastcol2 = -1; 595 rp2 = bj + bi[row]; 596 ap2 = ba + bi[row]; 597 rmax2 = bimax[row]; 598 nrow2 = bilen[row]; 599 low2 = 0; 600 high2 = nrow2; 601 602 for (j=0; j<n; j++) { 603 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 604 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 605 if (in[j] >= cstart && in[j] < cend) { 606 col = in[j] - cstart; 607 nonew = a->nonew; 608 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 609 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 610 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 611 #endif 612 } else if (in[j] < 0) continue; 613 #if defined(PETSC_USE_DEBUG) 614 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 615 #endif 616 else { 617 if (mat->was_assembled) { 618 if (!aij->colmap) { 619 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 620 } 621 #if defined(PETSC_USE_CTABLE) 622 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 623 col--; 624 #else 625 col = aij->colmap[in[j]] - 1; 626 #endif 627 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 628 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 629 col = in[j]; 630 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 631 B = aij->B; 632 b = (Mat_SeqAIJ*)B->data; 633 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 634 rp2 = bj + bi[row]; 635 ap2 = ba + bi[row]; 636 rmax2 = bimax[row]; 637 nrow2 = bilen[row]; 638 low2 = 0; 639 high2 = nrow2; 640 bm = aij->B->rmap->n; 641 ba = b->a; 642 inserted = PETSC_FALSE; 643 } else if (col < 0) { 644 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 645 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 646 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 647 } 648 } else col = in[j]; 649 nonew = b->nonew; 650 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 651 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 652 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 653 #endif 654 } 655 } 656 } else { 657 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 658 if (!aij->donotstash) { 659 mat->assembled = PETSC_FALSE; 660 if (roworiented) { 661 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 662 } else { 663 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 664 } 665 } 666 } 667 } 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 673 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 674 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 675 */ 676 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 677 { 678 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 679 Mat A = aij->A; /* diagonal part of the matrix */ 680 Mat B = aij->B; /* offdiagonal part of the matrix */ 681 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 682 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 683 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 684 PetscInt *ailen = a->ilen,*aj = a->j; 685 PetscInt *bilen = b->ilen,*bj = b->j; 686 PetscInt am = aij->A->rmap->n,j; 687 PetscInt diag_so_far = 0,dnz; 688 PetscInt offd_so_far = 0,onz; 689 690 PetscFunctionBegin; 691 /* Iterate over all rows of the matrix */ 692 for (j=0; j<am; j++) { 693 dnz = onz = 0; 694 /* Iterate over all non-zero columns of the current row */ 695 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 696 /* If column is in the diagonal */ 697 if (mat_j[col] >= cstart && mat_j[col] < cend) { 698 aj[diag_so_far++] = mat_j[col] - cstart; 699 dnz++; 700 } else { /* off-diagonal entries */ 701 bj[offd_so_far++] = mat_j[col]; 702 onz++; 703 } 704 } 705 ailen[j] = dnz; 706 bilen[j] = onz; 707 } 708 PetscFunctionReturn(0); 709 } 710 711 /* 712 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 713 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 714 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 715 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 716 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 717 */ 718 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat A = aij->A; /* diagonal part of the matrix */ 722 Mat B = aij->B; /* offdiagonal part of the matrix */ 723 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 724 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 725 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 726 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 727 PetscInt *ailen = a->ilen,*aj = a->j; 728 PetscInt *bilen = b->ilen,*bj = b->j; 729 PetscInt am = aij->A->rmap->n,j; 730 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 731 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 732 PetscScalar *aa = a->a,*ba = b->a; 733 734 PetscFunctionBegin; 735 /* Iterate over all rows of the matrix */ 736 for (j=0; j<am; j++) { 737 dnz_row = onz_row = 0; 738 rowstart_offd = full_offd_i[j]; 739 rowstart_diag = full_diag_i[j]; 740 /* Iterate over all non-zero columns of the current row */ 741 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 742 /* If column is in the diagonal */ 743 if (mat_j[col] >= cstart && mat_j[col] < cend) { 744 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 745 aa[rowstart_diag+dnz_row] = mat_a[col]; 746 dnz_row++; 747 } else { /* off-diagonal entries */ 748 bj[rowstart_offd+onz_row] = mat_j[col]; 749 ba[rowstart_offd+onz_row] = mat_a[col]; 750 onz_row++; 751 } 752 } 753 ailen[j] = dnz_row; 754 bilen[j] = onz_row; 755 } 756 PetscFunctionReturn(0); 757 } 758 759 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 760 { 761 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 762 PetscErrorCode ierr; 763 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 764 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 765 766 PetscFunctionBegin; 767 for (i=0; i<m; i++) { 768 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 769 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 770 if (idxm[i] >= rstart && idxm[i] < rend) { 771 row = idxm[i] - rstart; 772 for (j=0; j<n; j++) { 773 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 774 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 775 if (idxn[j] >= cstart && idxn[j] < cend) { 776 col = idxn[j] - cstart; 777 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 778 } else { 779 if (!aij->colmap) { 780 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 781 } 782 #if defined(PETSC_USE_CTABLE) 783 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 784 col--; 785 #else 786 col = aij->colmap[idxn[j]] - 1; 787 #endif 788 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 789 else { 790 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 791 } 792 } 793 } 794 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 795 } 796 PetscFunctionReturn(0); 797 } 798 799 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 800 801 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 802 { 803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 804 PetscErrorCode ierr; 805 PetscInt nstash,reallocs; 806 807 PetscFunctionBegin; 808 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 809 810 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 811 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 812 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 813 PetscFunctionReturn(0); 814 } 815 816 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 817 { 818 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 819 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 820 PetscErrorCode ierr; 821 PetscMPIInt n; 822 PetscInt i,j,rstart,ncols,flg; 823 PetscInt *row,*col; 824 PetscBool other_disassembled; 825 PetscScalar *val; 826 827 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 828 829 PetscFunctionBegin; 830 if (!aij->donotstash && !mat->nooffprocentries) { 831 while (1) { 832 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 833 if (!flg) break; 834 835 for (i=0; i<n; ) { 836 /* Now identify the consecutive vals belonging to the same row */ 837 for (j=i,rstart=row[j]; j<n; j++) { 838 if (row[j] != rstart) break; 839 } 840 if (j < n) ncols = j-i; 841 else ncols = n-i; 842 /* Now assemble all these values with a single function call */ 843 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 844 i = j; 845 } 846 } 847 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 848 } 849 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 850 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 851 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 852 if (mat->boundtocpu) { 853 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 854 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 855 } 856 #endif 857 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 858 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 859 860 /* determine if any processor has disassembled, if so we must 861 also disassemble ourself, in order that we may reassemble. */ 862 /* 863 if nonzero structure of submatrix B cannot change then we know that 864 no processor disassembled thus we can skip this stuff 865 */ 866 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 867 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 868 if (mat->was_assembled && !other_disassembled) { 869 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 870 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 871 #endif 872 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 } 875 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 876 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 877 } 878 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 879 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 880 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 881 #endif 882 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 883 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 884 885 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 886 887 aij->rowvalues = 0; 888 889 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 890 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 891 892 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 893 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 894 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 895 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 896 } 897 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 898 mat->offloadmask = PETSC_OFFLOAD_BOTH; 899 #endif 900 PetscFunctionReturn(0); 901 } 902 903 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 904 { 905 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 906 PetscErrorCode ierr; 907 908 PetscFunctionBegin; 909 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 910 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 911 PetscFunctionReturn(0); 912 } 913 914 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 915 { 916 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 917 PetscObjectState sA, sB; 918 PetscInt *lrows; 919 PetscInt r, len; 920 PetscBool cong, lch, gch; 921 PetscErrorCode ierr; 922 923 PetscFunctionBegin; 924 /* get locally owned rows */ 925 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 926 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 927 /* fix right hand side if needed */ 928 if (x && b) { 929 const PetscScalar *xx; 930 PetscScalar *bb; 931 932 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 933 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 934 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 935 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 936 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 937 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 938 } 939 940 sA = mat->A->nonzerostate; 941 sB = mat->B->nonzerostate; 942 943 if (diag != 0.0 && cong) { 944 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 945 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 946 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 947 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 948 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 949 PetscInt nnwA, nnwB; 950 PetscBool nnzA, nnzB; 951 952 nnwA = aijA->nonew; 953 nnwB = aijB->nonew; 954 nnzA = aijA->keepnonzeropattern; 955 nnzB = aijB->keepnonzeropattern; 956 if (!nnzA) { 957 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 958 aijA->nonew = 0; 959 } 960 if (!nnzB) { 961 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 962 aijB->nonew = 0; 963 } 964 /* Must zero here before the next loop */ 965 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 966 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 967 for (r = 0; r < len; ++r) { 968 const PetscInt row = lrows[r] + A->rmap->rstart; 969 if (row >= A->cmap->N) continue; 970 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 971 } 972 aijA->nonew = nnwA; 973 aijB->nonew = nnwB; 974 } else { 975 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 976 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 977 } 978 ierr = PetscFree(lrows);CHKERRQ(ierr); 979 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 980 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 982 /* reduce nonzerostate */ 983 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 984 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 985 if (gch) A->nonzerostate++; 986 PetscFunctionReturn(0); 987 } 988 989 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 990 { 991 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 992 PetscErrorCode ierr; 993 PetscMPIInt n = A->rmap->n; 994 PetscInt i,j,r,m,len = 0; 995 PetscInt *lrows,*owners = A->rmap->range; 996 PetscMPIInt p = 0; 997 PetscSFNode *rrows; 998 PetscSF sf; 999 const PetscScalar *xx; 1000 PetscScalar *bb,*mask; 1001 Vec xmask,lmask; 1002 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 1003 const PetscInt *aj, *ii,*ridx; 1004 PetscScalar *aa; 1005 1006 PetscFunctionBegin; 1007 /* Create SF where leaves are input rows and roots are owned rows */ 1008 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1009 for (r = 0; r < n; ++r) lrows[r] = -1; 1010 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1011 for (r = 0; r < N; ++r) { 1012 const PetscInt idx = rows[r]; 1013 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1014 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1015 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1016 } 1017 rrows[r].rank = p; 1018 rrows[r].index = rows[r] - owners[p]; 1019 } 1020 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1021 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1022 /* Collect flags for rows to be zeroed */ 1023 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1024 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1025 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1026 /* Compress and put in row numbers */ 1027 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1028 /* zero diagonal part of matrix */ 1029 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1030 /* handle off diagonal part of matrix */ 1031 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1032 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1033 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1034 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1035 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1036 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1039 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1040 PetscBool cong; 1041 1042 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1043 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1044 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1045 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1046 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1047 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1048 } 1049 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1050 /* remove zeroed rows of off diagonal matrix */ 1051 ii = aij->i; 1052 for (i=0; i<len; i++) { 1053 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1054 } 1055 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1056 if (aij->compressedrow.use) { 1057 m = aij->compressedrow.nrows; 1058 ii = aij->compressedrow.i; 1059 ridx = aij->compressedrow.rindex; 1060 for (i=0; i<m; i++) { 1061 n = ii[i+1] - ii[i]; 1062 aj = aij->j + ii[i]; 1063 aa = aij->a + ii[i]; 1064 1065 for (j=0; j<n; j++) { 1066 if (PetscAbsScalar(mask[*aj])) { 1067 if (b) bb[*ridx] -= *aa*xx[*aj]; 1068 *aa = 0.0; 1069 } 1070 aa++; 1071 aj++; 1072 } 1073 ridx++; 1074 } 1075 } else { /* do not use compressed row format */ 1076 m = l->B->rmap->n; 1077 for (i=0; i<m; i++) { 1078 n = ii[i+1] - ii[i]; 1079 aj = aij->j + ii[i]; 1080 aa = aij->a + ii[i]; 1081 for (j=0; j<n; j++) { 1082 if (PetscAbsScalar(mask[*aj])) { 1083 if (b) bb[i] -= *aa*xx[*aj]; 1084 *aa = 0.0; 1085 } 1086 aa++; 1087 aj++; 1088 } 1089 } 1090 } 1091 if (x && b) { 1092 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1093 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1094 } 1095 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1096 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1097 ierr = PetscFree(lrows);CHKERRQ(ierr); 1098 1099 /* only change matrix nonzero state if pattern was allowed to be changed */ 1100 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1101 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1102 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1103 } 1104 PetscFunctionReturn(0); 1105 } 1106 1107 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1108 { 1109 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1110 PetscErrorCode ierr; 1111 PetscInt nt; 1112 VecScatter Mvctx = a->Mvctx; 1113 1114 PetscFunctionBegin; 1115 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1116 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1117 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1118 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1119 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1125 { 1126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1135 { 1136 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1137 PetscErrorCode ierr; 1138 VecScatter Mvctx = a->Mvctx; 1139 1140 PetscFunctionBegin; 1141 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1142 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1143 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1144 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1145 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1150 { 1151 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1152 PetscErrorCode ierr; 1153 1154 PetscFunctionBegin; 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 /* do local part */ 1158 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1159 /* add partial results together */ 1160 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1161 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1166 { 1167 MPI_Comm comm; 1168 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1169 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1170 IS Me,Notme; 1171 PetscErrorCode ierr; 1172 PetscInt M,N,first,last,*notme,i; 1173 PetscBool lf; 1174 PetscMPIInt size; 1175 1176 PetscFunctionBegin; 1177 /* Easy test: symmetric diagonal block */ 1178 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1179 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1180 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1181 if (!*f) PetscFunctionReturn(0); 1182 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1183 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1184 if (size == 1) PetscFunctionReturn(0); 1185 1186 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1187 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1188 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1189 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1190 for (i=0; i<first; i++) notme[i] = i; 1191 for (i=last; i<M; i++) notme[i-last+first] = i; 1192 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1193 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1194 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1195 Aoff = Aoffs[0]; 1196 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1197 Boff = Boffs[0]; 1198 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1199 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1200 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1201 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1202 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1203 ierr = PetscFree(notme);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1208 { 1209 PetscErrorCode ierr; 1210 1211 PetscFunctionBegin; 1212 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1217 { 1218 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1219 PetscErrorCode ierr; 1220 1221 PetscFunctionBegin; 1222 /* do nondiagonal part */ 1223 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1224 /* do local part */ 1225 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1226 /* add partial results together */ 1227 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1228 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 /* 1233 This only works correctly for square matrices where the subblock A->A is the 1234 diagonal block 1235 */ 1236 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1237 { 1238 PetscErrorCode ierr; 1239 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1240 1241 PetscFunctionBegin; 1242 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1243 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1244 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1249 { 1250 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1251 PetscErrorCode ierr; 1252 1253 PetscFunctionBegin; 1254 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1255 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1260 { 1261 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1262 PetscErrorCode ierr; 1263 1264 PetscFunctionBegin; 1265 #if defined(PETSC_USE_LOG) 1266 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1267 #endif 1268 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1269 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1270 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1271 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1272 #if defined(PETSC_USE_CTABLE) 1273 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1274 #else 1275 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1276 #endif 1277 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1278 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1279 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1280 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1281 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1282 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1283 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1284 1285 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1295 #if defined(PETSC_HAVE_ELEMENTAL) 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1297 #endif 1298 #if defined(PETSC_HAVE_HYPRE) 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1301 #endif 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1309 { 1310 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1311 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1312 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1313 const PetscInt *garray = aij->garray; 1314 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1315 PetscInt *rowlens; 1316 PetscInt *colidxs; 1317 PetscScalar *matvals; 1318 PetscErrorCode ierr; 1319 1320 PetscFunctionBegin; 1321 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1322 1323 M = mat->rmap->N; 1324 N = mat->cmap->N; 1325 m = mat->rmap->n; 1326 rs = mat->rmap->rstart; 1327 cs = mat->cmap->rstart; 1328 nz = A->nz + B->nz; 1329 1330 /* write matrix header */ 1331 header[0] = MAT_FILE_CLASSID; 1332 header[1] = M; header[2] = N; header[3] = nz; 1333 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1334 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1335 1336 /* fill in and store row lengths */ 1337 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1338 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1339 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1340 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1341 1342 /* fill in and store column indices */ 1343 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1344 for (cnt=0, i=0; i<m; i++) { 1345 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1346 if (garray[B->j[jb]] > cs) break; 1347 colidxs[cnt++] = garray[B->j[jb]]; 1348 } 1349 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1350 colidxs[cnt++] = A->j[ja] + cs; 1351 for (; jb<B->i[i+1]; jb++) 1352 colidxs[cnt++] = garray[B->j[jb]]; 1353 } 1354 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1355 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1356 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1357 1358 /* fill in and store nonzero values */ 1359 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1360 for (cnt=0, i=0; i<m; i++) { 1361 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1362 if (garray[B->j[jb]] > cs) break; 1363 matvals[cnt++] = B->a[jb]; 1364 } 1365 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1366 matvals[cnt++] = A->a[ja]; 1367 for (; jb<B->i[i+1]; jb++) 1368 matvals[cnt++] = B->a[jb]; 1369 } 1370 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1371 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1372 ierr = PetscFree(matvals);CHKERRQ(ierr); 1373 1374 /* write block size option to the viewer's .info file */ 1375 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #include <petscdraw.h> 1380 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1381 { 1382 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1383 PetscErrorCode ierr; 1384 PetscMPIInt rank = aij->rank,size = aij->size; 1385 PetscBool isdraw,iascii,isbinary; 1386 PetscViewer sviewer; 1387 PetscViewerFormat format; 1388 1389 PetscFunctionBegin; 1390 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1391 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1392 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1393 if (iascii) { 1394 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1395 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1396 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1397 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1398 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1399 for (i=0; i<(PetscInt)size; i++) { 1400 nmax = PetscMax(nmax,nz[i]); 1401 nmin = PetscMin(nmin,nz[i]); 1402 navg += nz[i]; 1403 } 1404 ierr = PetscFree(nz);CHKERRQ(ierr); 1405 navg = navg/size; 1406 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1410 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1411 MatInfo info; 1412 PetscBool inodes; 1413 1414 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1415 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1416 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1418 if (!inodes) { 1419 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1420 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1421 } else { 1422 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1423 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1424 } 1425 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1426 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1427 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1429 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1430 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1431 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1432 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1435 PetscInt inodecount,inodelimit,*inodes; 1436 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1437 if (inodes) { 1438 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1439 } else { 1440 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1441 } 1442 PetscFunctionReturn(0); 1443 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1444 PetscFunctionReturn(0); 1445 } 1446 } else if (isbinary) { 1447 if (size == 1) { 1448 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1449 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1450 } else { 1451 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1452 } 1453 PetscFunctionReturn(0); 1454 } else if (iascii && size == 1) { 1455 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1456 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1457 PetscFunctionReturn(0); 1458 } else if (isdraw) { 1459 PetscDraw draw; 1460 PetscBool isnull; 1461 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1462 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1463 if (isnull) PetscFunctionReturn(0); 1464 } 1465 1466 { /* assemble the entire matrix onto first processor */ 1467 Mat A = NULL, Av; 1468 IS isrow,iscol; 1469 1470 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1471 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1472 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1473 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1474 /* The commented code uses MatCreateSubMatrices instead */ 1475 /* 1476 Mat *AA, A = NULL, Av; 1477 IS isrow,iscol; 1478 1479 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1480 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1481 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1482 if (!rank) { 1483 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1484 A = AA[0]; 1485 Av = AA[0]; 1486 } 1487 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1488 */ 1489 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1490 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1491 /* 1492 Everyone has to call to draw the matrix since the graphics waits are 1493 synchronized across all processors that share the PetscDraw object 1494 */ 1495 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1496 if (!rank) { 1497 if (((PetscObject)mat)->name) { 1498 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1499 } 1500 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1501 } 1502 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1503 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1504 ierr = MatDestroy(&A);CHKERRQ(ierr); 1505 } 1506 PetscFunctionReturn(0); 1507 } 1508 1509 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1510 { 1511 PetscErrorCode ierr; 1512 PetscBool iascii,isdraw,issocket,isbinary; 1513 1514 PetscFunctionBegin; 1515 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1516 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1517 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1518 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1519 if (iascii || isdraw || isbinary || issocket) { 1520 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1521 } 1522 PetscFunctionReturn(0); 1523 } 1524 1525 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1526 { 1527 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1528 PetscErrorCode ierr; 1529 Vec bb1 = 0; 1530 PetscBool hasop; 1531 1532 PetscFunctionBegin; 1533 if (flag == SOR_APPLY_UPPER) { 1534 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1535 PetscFunctionReturn(0); 1536 } 1537 1538 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1539 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1540 } 1541 1542 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1543 if (flag & SOR_ZERO_INITIAL_GUESS) { 1544 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1545 its--; 1546 } 1547 1548 while (its--) { 1549 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1550 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1551 1552 /* update rhs: bb1 = bb - B*x */ 1553 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1554 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1555 1556 /* local sweep */ 1557 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1558 } 1559 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1560 if (flag & SOR_ZERO_INITIAL_GUESS) { 1561 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1562 its--; 1563 } 1564 while (its--) { 1565 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1566 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1567 1568 /* update rhs: bb1 = bb - B*x */ 1569 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1570 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1571 1572 /* local sweep */ 1573 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1574 } 1575 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1576 if (flag & SOR_ZERO_INITIAL_GUESS) { 1577 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1578 its--; 1579 } 1580 while (its--) { 1581 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1582 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1583 1584 /* update rhs: bb1 = bb - B*x */ 1585 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1586 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1587 1588 /* local sweep */ 1589 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1590 } 1591 } else if (flag & SOR_EISENSTAT) { 1592 Vec xx1; 1593 1594 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1595 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1596 1597 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1598 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1599 if (!mat->diag) { 1600 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1601 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1602 } 1603 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1604 if (hasop) { 1605 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1606 } else { 1607 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1608 } 1609 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1610 1611 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1612 1613 /* local sweep */ 1614 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1615 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1616 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1617 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1618 1619 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1620 1621 matin->factorerrortype = mat->A->factorerrortype; 1622 PetscFunctionReturn(0); 1623 } 1624 1625 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1626 { 1627 Mat aA,aB,Aperm; 1628 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1629 PetscScalar *aa,*ba; 1630 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1631 PetscSF rowsf,sf; 1632 IS parcolp = NULL; 1633 PetscBool done; 1634 PetscErrorCode ierr; 1635 1636 PetscFunctionBegin; 1637 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1638 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1639 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1640 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1641 1642 /* Invert row permutation to find out where my rows should go */ 1643 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1644 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1645 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1646 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1647 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1648 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1649 1650 /* Invert column permutation to find out where my columns should go */ 1651 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1652 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1653 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1654 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1655 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1656 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1657 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1658 1659 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1660 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1661 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1662 1663 /* Find out where my gcols should go */ 1664 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1665 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1666 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1667 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1668 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1669 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1670 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1671 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1672 1673 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1674 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1675 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1676 for (i=0; i<m; i++) { 1677 PetscInt row = rdest[i]; 1678 PetscMPIInt rowner; 1679 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1680 for (j=ai[i]; j<ai[i+1]; j++) { 1681 PetscInt col = cdest[aj[j]]; 1682 PetscMPIInt cowner; 1683 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1684 if (rowner == cowner) dnnz[i]++; 1685 else onnz[i]++; 1686 } 1687 for (j=bi[i]; j<bi[i+1]; j++) { 1688 PetscInt col = gcdest[bj[j]]; 1689 PetscMPIInt cowner; 1690 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1691 if (rowner == cowner) dnnz[i]++; 1692 else onnz[i]++; 1693 } 1694 } 1695 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1696 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1699 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1700 1701 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1702 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1703 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1704 for (i=0; i<m; i++) { 1705 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1706 PetscInt j0,rowlen; 1707 rowlen = ai[i+1] - ai[i]; 1708 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1709 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1710 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1711 } 1712 rowlen = bi[i+1] - bi[i]; 1713 for (j0=j=0; j<rowlen; j0=j) { 1714 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1715 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1716 } 1717 } 1718 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1719 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1720 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1721 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1722 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1723 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1724 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1725 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1726 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1727 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1728 *B = Aperm; 1729 PetscFunctionReturn(0); 1730 } 1731 1732 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1733 { 1734 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1735 PetscErrorCode ierr; 1736 1737 PetscFunctionBegin; 1738 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1739 if (ghosts) *ghosts = aij->garray; 1740 PetscFunctionReturn(0); 1741 } 1742 1743 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1744 { 1745 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1746 Mat A = mat->A,B = mat->B; 1747 PetscErrorCode ierr; 1748 PetscLogDouble isend[5],irecv[5]; 1749 1750 PetscFunctionBegin; 1751 info->block_size = 1.0; 1752 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1753 1754 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1755 isend[3] = info->memory; isend[4] = info->mallocs; 1756 1757 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1758 1759 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1760 isend[3] += info->memory; isend[4] += info->mallocs; 1761 if (flag == MAT_LOCAL) { 1762 info->nz_used = isend[0]; 1763 info->nz_allocated = isend[1]; 1764 info->nz_unneeded = isend[2]; 1765 info->memory = isend[3]; 1766 info->mallocs = isend[4]; 1767 } else if (flag == MAT_GLOBAL_MAX) { 1768 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1769 1770 info->nz_used = irecv[0]; 1771 info->nz_allocated = irecv[1]; 1772 info->nz_unneeded = irecv[2]; 1773 info->memory = irecv[3]; 1774 info->mallocs = irecv[4]; 1775 } else if (flag == MAT_GLOBAL_SUM) { 1776 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1777 1778 info->nz_used = irecv[0]; 1779 info->nz_allocated = irecv[1]; 1780 info->nz_unneeded = irecv[2]; 1781 info->memory = irecv[3]; 1782 info->mallocs = irecv[4]; 1783 } 1784 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1785 info->fill_ratio_needed = 0; 1786 info->factor_mallocs = 0; 1787 PetscFunctionReturn(0); 1788 } 1789 1790 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1791 { 1792 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1793 PetscErrorCode ierr; 1794 1795 PetscFunctionBegin; 1796 switch (op) { 1797 case MAT_NEW_NONZERO_LOCATIONS: 1798 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1799 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1800 case MAT_KEEP_NONZERO_PATTERN: 1801 case MAT_NEW_NONZERO_LOCATION_ERR: 1802 case MAT_USE_INODES: 1803 case MAT_IGNORE_ZERO_ENTRIES: 1804 MatCheckPreallocated(A,1); 1805 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1806 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1807 break; 1808 case MAT_ROW_ORIENTED: 1809 MatCheckPreallocated(A,1); 1810 a->roworiented = flg; 1811 1812 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1813 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1814 break; 1815 case MAT_NEW_DIAGONALS: 1816 case MAT_SORTED_FULL: 1817 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1818 break; 1819 case MAT_IGNORE_OFF_PROC_ENTRIES: 1820 a->donotstash = flg; 1821 break; 1822 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1823 case MAT_SPD: 1824 case MAT_SYMMETRIC: 1825 case MAT_STRUCTURALLY_SYMMETRIC: 1826 case MAT_HERMITIAN: 1827 case MAT_SYMMETRY_ETERNAL: 1828 break; 1829 case MAT_SUBMAT_SINGLEIS: 1830 A->submat_singleis = flg; 1831 break; 1832 case MAT_STRUCTURE_ONLY: 1833 /* The option is handled directly by MatSetOption() */ 1834 break; 1835 default: 1836 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1837 } 1838 PetscFunctionReturn(0); 1839 } 1840 1841 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1842 { 1843 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1844 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1845 PetscErrorCode ierr; 1846 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1847 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1848 PetscInt *cmap,*idx_p; 1849 1850 PetscFunctionBegin; 1851 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1852 mat->getrowactive = PETSC_TRUE; 1853 1854 if (!mat->rowvalues && (idx || v)) { 1855 /* 1856 allocate enough space to hold information from the longest row. 1857 */ 1858 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1859 PetscInt max = 1,tmp; 1860 for (i=0; i<matin->rmap->n; i++) { 1861 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1862 if (max < tmp) max = tmp; 1863 } 1864 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1865 } 1866 1867 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1868 lrow = row - rstart; 1869 1870 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1871 if (!v) {pvA = 0; pvB = 0;} 1872 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1873 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1874 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1875 nztot = nzA + nzB; 1876 1877 cmap = mat->garray; 1878 if (v || idx) { 1879 if (nztot) { 1880 /* Sort by increasing column numbers, assuming A and B already sorted */ 1881 PetscInt imark = -1; 1882 if (v) { 1883 *v = v_p = mat->rowvalues; 1884 for (i=0; i<nzB; i++) { 1885 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1886 else break; 1887 } 1888 imark = i; 1889 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1890 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1891 } 1892 if (idx) { 1893 *idx = idx_p = mat->rowindices; 1894 if (imark > -1) { 1895 for (i=0; i<imark; i++) { 1896 idx_p[i] = cmap[cworkB[i]]; 1897 } 1898 } else { 1899 for (i=0; i<nzB; i++) { 1900 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1901 else break; 1902 } 1903 imark = i; 1904 } 1905 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1906 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1907 } 1908 } else { 1909 if (idx) *idx = 0; 1910 if (v) *v = 0; 1911 } 1912 } 1913 *nz = nztot; 1914 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1915 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1920 { 1921 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1922 1923 PetscFunctionBegin; 1924 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1925 aij->getrowactive = PETSC_FALSE; 1926 PetscFunctionReturn(0); 1927 } 1928 1929 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1930 { 1931 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1932 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1933 PetscErrorCode ierr; 1934 PetscInt i,j,cstart = mat->cmap->rstart; 1935 PetscReal sum = 0.0; 1936 MatScalar *v; 1937 1938 PetscFunctionBegin; 1939 if (aij->size == 1) { 1940 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1941 } else { 1942 if (type == NORM_FROBENIUS) { 1943 v = amat->a; 1944 for (i=0; i<amat->nz; i++) { 1945 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1946 } 1947 v = bmat->a; 1948 for (i=0; i<bmat->nz; i++) { 1949 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1950 } 1951 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1952 *norm = PetscSqrtReal(*norm); 1953 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1954 } else if (type == NORM_1) { /* max column norm */ 1955 PetscReal *tmp,*tmp2; 1956 PetscInt *jj,*garray = aij->garray; 1957 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1958 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1959 *norm = 0.0; 1960 v = amat->a; jj = amat->j; 1961 for (j=0; j<amat->nz; j++) { 1962 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1963 } 1964 v = bmat->a; jj = bmat->j; 1965 for (j=0; j<bmat->nz; j++) { 1966 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1967 } 1968 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1969 for (j=0; j<mat->cmap->N; j++) { 1970 if (tmp2[j] > *norm) *norm = tmp2[j]; 1971 } 1972 ierr = PetscFree(tmp);CHKERRQ(ierr); 1973 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1974 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1975 } else if (type == NORM_INFINITY) { /* max row norm */ 1976 PetscReal ntemp = 0.0; 1977 for (j=0; j<aij->A->rmap->n; j++) { 1978 v = amat->a + amat->i[j]; 1979 sum = 0.0; 1980 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1981 sum += PetscAbsScalar(*v); v++; 1982 } 1983 v = bmat->a + bmat->i[j]; 1984 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1985 sum += PetscAbsScalar(*v); v++; 1986 } 1987 if (sum > ntemp) ntemp = sum; 1988 } 1989 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1990 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1991 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1992 } 1993 PetscFunctionReturn(0); 1994 } 1995 1996 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1997 { 1998 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1999 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2000 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2001 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2002 PetscErrorCode ierr; 2003 Mat B,A_diag,*B_diag; 2004 const MatScalar *array; 2005 2006 PetscFunctionBegin; 2007 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2008 ai = Aloc->i; aj = Aloc->j; 2009 bi = Bloc->i; bj = Bloc->j; 2010 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2011 PetscInt *d_nnz,*g_nnz,*o_nnz; 2012 PetscSFNode *oloc; 2013 PETSC_UNUSED PetscSF sf; 2014 2015 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2016 /* compute d_nnz for preallocation */ 2017 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2018 for (i=0; i<ai[ma]; i++) { 2019 d_nnz[aj[i]]++; 2020 } 2021 /* compute local off-diagonal contributions */ 2022 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2023 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2024 /* map those to global */ 2025 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2026 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2027 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2028 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2029 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2030 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2031 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2032 2033 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2034 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2035 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2036 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2037 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2038 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2039 } else { 2040 B = *matout; 2041 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2042 } 2043 2044 b = (Mat_MPIAIJ*)B->data; 2045 A_diag = a->A; 2046 B_diag = &b->A; 2047 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2048 A_diag_ncol = A_diag->cmap->N; 2049 B_diag_ilen = sub_B_diag->ilen; 2050 B_diag_i = sub_B_diag->i; 2051 2052 /* Set ilen for diagonal of B */ 2053 for (i=0; i<A_diag_ncol; i++) { 2054 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2055 } 2056 2057 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2058 very quickly (=without using MatSetValues), because all writes are local. */ 2059 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2060 2061 /* copy over the B part */ 2062 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2063 array = Bloc->a; 2064 row = A->rmap->rstart; 2065 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2066 cols_tmp = cols; 2067 for (i=0; i<mb; i++) { 2068 ncol = bi[i+1]-bi[i]; 2069 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2070 row++; 2071 array += ncol; cols_tmp += ncol; 2072 } 2073 ierr = PetscFree(cols);CHKERRQ(ierr); 2074 2075 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2076 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2077 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2078 *matout = B; 2079 } else { 2080 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2081 } 2082 PetscFunctionReturn(0); 2083 } 2084 2085 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2086 { 2087 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2088 Mat a = aij->A,b = aij->B; 2089 PetscErrorCode ierr; 2090 PetscInt s1,s2,s3; 2091 2092 PetscFunctionBegin; 2093 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2094 if (rr) { 2095 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2096 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2097 /* Overlap communication with computation. */ 2098 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2099 } 2100 if (ll) { 2101 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2102 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2103 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2104 } 2105 /* scale the diagonal block */ 2106 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2107 2108 if (rr) { 2109 /* Do a scatter end and then right scale the off-diagonal block */ 2110 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2111 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2112 } 2113 PetscFunctionReturn(0); 2114 } 2115 2116 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2117 { 2118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2119 PetscErrorCode ierr; 2120 2121 PetscFunctionBegin; 2122 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2123 PetscFunctionReturn(0); 2124 } 2125 2126 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2127 { 2128 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2129 Mat a,b,c,d; 2130 PetscBool flg; 2131 PetscErrorCode ierr; 2132 2133 PetscFunctionBegin; 2134 a = matA->A; b = matA->B; 2135 c = matB->A; d = matB->B; 2136 2137 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2138 if (flg) { 2139 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2140 } 2141 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2146 { 2147 PetscErrorCode ierr; 2148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2149 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2150 2151 PetscFunctionBegin; 2152 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2153 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2154 /* because of the column compression in the off-processor part of the matrix a->B, 2155 the number of columns in a->B and b->B may be different, hence we cannot call 2156 the MatCopy() directly on the two parts. If need be, we can provide a more 2157 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2158 then copying the submatrices */ 2159 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2160 } else { 2161 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2162 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2163 } 2164 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2165 PetscFunctionReturn(0); 2166 } 2167 2168 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2169 { 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 /* 2178 Computes the number of nonzeros per row needed for preallocation when X and Y 2179 have different nonzero structure. 2180 */ 2181 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2182 { 2183 PetscInt i,j,k,nzx,nzy; 2184 2185 PetscFunctionBegin; 2186 /* Set the number of nonzeros in the new matrix */ 2187 for (i=0; i<m; i++) { 2188 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2189 nzx = xi[i+1] - xi[i]; 2190 nzy = yi[i+1] - yi[i]; 2191 nnz[i] = 0; 2192 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2193 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2194 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2195 nnz[i]++; 2196 } 2197 for (; k<nzy; k++) nnz[i]++; 2198 } 2199 PetscFunctionReturn(0); 2200 } 2201 2202 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2203 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2204 { 2205 PetscErrorCode ierr; 2206 PetscInt m = Y->rmap->N; 2207 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2208 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2209 2210 PetscFunctionBegin; 2211 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2212 PetscFunctionReturn(0); 2213 } 2214 2215 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2216 { 2217 PetscErrorCode ierr; 2218 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2219 PetscBLASInt bnz,one=1; 2220 Mat_SeqAIJ *x,*y; 2221 2222 PetscFunctionBegin; 2223 if (str == SAME_NONZERO_PATTERN) { 2224 PetscScalar alpha = a; 2225 x = (Mat_SeqAIJ*)xx->A->data; 2226 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2227 y = (Mat_SeqAIJ*)yy->A->data; 2228 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2229 x = (Mat_SeqAIJ*)xx->B->data; 2230 y = (Mat_SeqAIJ*)yy->B->data; 2231 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2232 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2233 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2234 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2235 will be updated */ 2236 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2237 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2238 Y->offloadmask = PETSC_OFFLOAD_CPU; 2239 } 2240 #endif 2241 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2242 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2243 } else { 2244 Mat B; 2245 PetscInt *nnz_d,*nnz_o; 2246 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2247 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2248 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2249 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2250 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2251 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2252 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2253 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2254 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2255 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2256 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2257 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2258 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2259 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2260 } 2261 PetscFunctionReturn(0); 2262 } 2263 2264 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2265 2266 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2267 { 2268 #if defined(PETSC_USE_COMPLEX) 2269 PetscErrorCode ierr; 2270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2271 2272 PetscFunctionBegin; 2273 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2274 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2275 #else 2276 PetscFunctionBegin; 2277 #endif 2278 PetscFunctionReturn(0); 2279 } 2280 2281 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2282 { 2283 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2284 PetscErrorCode ierr; 2285 2286 PetscFunctionBegin; 2287 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2288 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2289 PetscFunctionReturn(0); 2290 } 2291 2292 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2293 { 2294 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2295 PetscErrorCode ierr; 2296 2297 PetscFunctionBegin; 2298 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2299 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2304 { 2305 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2306 PetscErrorCode ierr; 2307 PetscInt i,*idxb = 0; 2308 PetscScalar *va,*vb; 2309 Vec vtmp; 2310 2311 PetscFunctionBegin; 2312 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2313 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2314 if (idx) { 2315 for (i=0; i<A->rmap->n; i++) { 2316 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2317 } 2318 } 2319 2320 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2321 if (idx) { 2322 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2323 } 2324 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2325 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2326 2327 for (i=0; i<A->rmap->n; i++) { 2328 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2329 va[i] = vb[i]; 2330 if (idx) idx[i] = a->garray[idxb[i]]; 2331 } 2332 } 2333 2334 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2335 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2336 ierr = PetscFree(idxb);CHKERRQ(ierr); 2337 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2338 PetscFunctionReturn(0); 2339 } 2340 2341 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2342 { 2343 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2344 PetscErrorCode ierr; 2345 PetscInt i,*idxb = 0; 2346 PetscScalar *va,*vb; 2347 Vec vtmp; 2348 2349 PetscFunctionBegin; 2350 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2351 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2352 if (idx) { 2353 for (i=0; i<A->cmap->n; i++) { 2354 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2355 } 2356 } 2357 2358 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2359 if (idx) { 2360 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2361 } 2362 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2363 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2364 2365 for (i=0; i<A->rmap->n; i++) { 2366 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2367 va[i] = vb[i]; 2368 if (idx) idx[i] = a->garray[idxb[i]]; 2369 } 2370 } 2371 2372 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2373 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2374 ierr = PetscFree(idxb);CHKERRQ(ierr); 2375 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2380 { 2381 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2382 PetscInt n = A->rmap->n; 2383 PetscInt cstart = A->cmap->rstart; 2384 PetscInt *cmap = mat->garray; 2385 PetscInt *diagIdx, *offdiagIdx; 2386 Vec diagV, offdiagV; 2387 PetscScalar *a, *diagA, *offdiagA; 2388 PetscInt r; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2393 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2394 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2395 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2396 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2397 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2398 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2399 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2400 for (r = 0; r < n; ++r) { 2401 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2402 a[r] = diagA[r]; 2403 idx[r] = cstart + diagIdx[r]; 2404 } else { 2405 a[r] = offdiagA[r]; 2406 idx[r] = cmap[offdiagIdx[r]]; 2407 } 2408 } 2409 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2410 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2411 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2412 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2413 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2414 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2415 PetscFunctionReturn(0); 2416 } 2417 2418 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2419 { 2420 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2421 PetscInt n = A->rmap->n; 2422 PetscInt cstart = A->cmap->rstart; 2423 PetscInt *cmap = mat->garray; 2424 PetscInt *diagIdx, *offdiagIdx; 2425 Vec diagV, offdiagV; 2426 PetscScalar *a, *diagA, *offdiagA; 2427 PetscInt r; 2428 PetscErrorCode ierr; 2429 2430 PetscFunctionBegin; 2431 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2432 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2433 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2434 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2435 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2436 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2437 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2438 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2439 for (r = 0; r < n; ++r) { 2440 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2441 a[r] = diagA[r]; 2442 idx[r] = cstart + diagIdx[r]; 2443 } else { 2444 a[r] = offdiagA[r]; 2445 idx[r] = cmap[offdiagIdx[r]]; 2446 } 2447 } 2448 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2449 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2450 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2451 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2452 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2453 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2458 { 2459 PetscErrorCode ierr; 2460 Mat *dummy; 2461 2462 PetscFunctionBegin; 2463 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2464 *newmat = *dummy; 2465 ierr = PetscFree(dummy);CHKERRQ(ierr); 2466 PetscFunctionReturn(0); 2467 } 2468 2469 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2470 { 2471 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2472 PetscErrorCode ierr; 2473 2474 PetscFunctionBegin; 2475 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2476 A->factorerrortype = a->A->factorerrortype; 2477 PetscFunctionReturn(0); 2478 } 2479 2480 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2481 { 2482 PetscErrorCode ierr; 2483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2484 2485 PetscFunctionBegin; 2486 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2487 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2488 if (x->assembled) { 2489 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2490 } else { 2491 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2492 } 2493 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2494 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2495 PetscFunctionReturn(0); 2496 } 2497 2498 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2499 { 2500 PetscFunctionBegin; 2501 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2502 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2503 PetscFunctionReturn(0); 2504 } 2505 2506 /*@ 2507 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2508 2509 Collective on Mat 2510 2511 Input Parameters: 2512 + A - the matrix 2513 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2514 2515 Level: advanced 2516 2517 @*/ 2518 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2519 { 2520 PetscErrorCode ierr; 2521 2522 PetscFunctionBegin; 2523 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2524 PetscFunctionReturn(0); 2525 } 2526 2527 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2528 { 2529 PetscErrorCode ierr; 2530 PetscBool sc = PETSC_FALSE,flg; 2531 2532 PetscFunctionBegin; 2533 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2534 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2535 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2536 if (flg) { 2537 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2538 } 2539 ierr = PetscOptionsTail();CHKERRQ(ierr); 2540 PetscFunctionReturn(0); 2541 } 2542 2543 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2544 { 2545 PetscErrorCode ierr; 2546 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2547 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2548 2549 PetscFunctionBegin; 2550 if (!Y->preallocated) { 2551 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2552 } else if (!aij->nz) { 2553 PetscInt nonew = aij->nonew; 2554 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2555 aij->nonew = nonew; 2556 } 2557 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2562 { 2563 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2564 PetscErrorCode ierr; 2565 2566 PetscFunctionBegin; 2567 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2568 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2569 if (d) { 2570 PetscInt rstart; 2571 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2572 *d += rstart; 2573 2574 } 2575 PetscFunctionReturn(0); 2576 } 2577 2578 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2579 { 2580 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2581 PetscErrorCode ierr; 2582 2583 PetscFunctionBegin; 2584 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2585 PetscFunctionReturn(0); 2586 } 2587 2588 /* -------------------------------------------------------------------*/ 2589 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2590 MatGetRow_MPIAIJ, 2591 MatRestoreRow_MPIAIJ, 2592 MatMult_MPIAIJ, 2593 /* 4*/ MatMultAdd_MPIAIJ, 2594 MatMultTranspose_MPIAIJ, 2595 MatMultTransposeAdd_MPIAIJ, 2596 0, 2597 0, 2598 0, 2599 /*10*/ 0, 2600 0, 2601 0, 2602 MatSOR_MPIAIJ, 2603 MatTranspose_MPIAIJ, 2604 /*15*/ MatGetInfo_MPIAIJ, 2605 MatEqual_MPIAIJ, 2606 MatGetDiagonal_MPIAIJ, 2607 MatDiagonalScale_MPIAIJ, 2608 MatNorm_MPIAIJ, 2609 /*20*/ MatAssemblyBegin_MPIAIJ, 2610 MatAssemblyEnd_MPIAIJ, 2611 MatSetOption_MPIAIJ, 2612 MatZeroEntries_MPIAIJ, 2613 /*24*/ MatZeroRows_MPIAIJ, 2614 0, 2615 0, 2616 0, 2617 0, 2618 /*29*/ MatSetUp_MPIAIJ, 2619 0, 2620 0, 2621 MatGetDiagonalBlock_MPIAIJ, 2622 0, 2623 /*34*/ MatDuplicate_MPIAIJ, 2624 0, 2625 0, 2626 0, 2627 0, 2628 /*39*/ MatAXPY_MPIAIJ, 2629 MatCreateSubMatrices_MPIAIJ, 2630 MatIncreaseOverlap_MPIAIJ, 2631 MatGetValues_MPIAIJ, 2632 MatCopy_MPIAIJ, 2633 /*44*/ MatGetRowMax_MPIAIJ, 2634 MatScale_MPIAIJ, 2635 MatShift_MPIAIJ, 2636 MatDiagonalSet_MPIAIJ, 2637 MatZeroRowsColumns_MPIAIJ, 2638 /*49*/ MatSetRandom_MPIAIJ, 2639 0, 2640 0, 2641 0, 2642 0, 2643 /*54*/ MatFDColoringCreate_MPIXAIJ, 2644 0, 2645 MatSetUnfactored_MPIAIJ, 2646 MatPermute_MPIAIJ, 2647 0, 2648 /*59*/ MatCreateSubMatrix_MPIAIJ, 2649 MatDestroy_MPIAIJ, 2650 MatView_MPIAIJ, 2651 0, 2652 0, 2653 /*64*/ 0, 2654 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2655 0, 2656 0, 2657 0, 2658 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2659 MatGetRowMinAbs_MPIAIJ, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*75*/ MatFDColoringApply_AIJ, 2665 MatSetFromOptions_MPIAIJ, 2666 0, 2667 0, 2668 MatFindZeroDiagonals_MPIAIJ, 2669 /*80*/ 0, 2670 0, 2671 0, 2672 /*83*/ MatLoad_MPIAIJ, 2673 MatIsSymmetric_MPIAIJ, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /*89*/ 0, 2679 0, 2680 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2681 0, 2682 0, 2683 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2684 0, 2685 0, 2686 0, 2687 MatBindToCPU_MPIAIJ, 2688 /*99*/ MatProductSetFromOptions_MPIAIJ, 2689 0, 2690 0, 2691 MatConjugate_MPIAIJ, 2692 0, 2693 /*104*/MatSetValuesRow_MPIAIJ, 2694 MatRealPart_MPIAIJ, 2695 MatImaginaryPart_MPIAIJ, 2696 0, 2697 0, 2698 /*109*/0, 2699 0, 2700 MatGetRowMin_MPIAIJ, 2701 0, 2702 MatMissingDiagonal_MPIAIJ, 2703 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2704 0, 2705 MatGetGhosts_MPIAIJ, 2706 0, 2707 0, 2708 /*119*/0, 2709 0, 2710 0, 2711 0, 2712 MatGetMultiProcBlock_MPIAIJ, 2713 /*124*/MatFindNonzeroRows_MPIAIJ, 2714 MatGetColumnNorms_MPIAIJ, 2715 MatInvertBlockDiagonal_MPIAIJ, 2716 MatInvertVariableBlockDiagonal_MPIAIJ, 2717 MatCreateSubMatricesMPI_MPIAIJ, 2718 /*129*/0, 2719 0, 2720 0, 2721 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2722 0, 2723 /*134*/0, 2724 0, 2725 0, 2726 0, 2727 0, 2728 /*139*/MatSetBlockSizes_MPIAIJ, 2729 0, 2730 0, 2731 MatFDColoringSetUp_MPIXAIJ, 2732 MatFindOffBlockDiagonalEntries_MPIAIJ, 2733 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2734 /*145*/0, 2735 0, 2736 0 2737 }; 2738 2739 /* ----------------------------------------------------------------------------------------*/ 2740 2741 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2742 { 2743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2748 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751 2752 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2753 { 2754 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2755 PetscErrorCode ierr; 2756 2757 PetscFunctionBegin; 2758 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2759 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2760 PetscFunctionReturn(0); 2761 } 2762 2763 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2764 { 2765 Mat_MPIAIJ *b; 2766 PetscErrorCode ierr; 2767 PetscMPIInt size; 2768 2769 PetscFunctionBegin; 2770 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2771 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2772 b = (Mat_MPIAIJ*)B->data; 2773 2774 #if defined(PETSC_USE_CTABLE) 2775 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2776 #else 2777 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2778 #endif 2779 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2780 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2781 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2782 2783 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2784 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2785 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2786 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2787 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2788 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2789 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2791 2792 if (!B->preallocated) { 2793 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2794 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2795 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2796 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2797 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2798 } 2799 2800 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2801 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2802 B->preallocated = PETSC_TRUE; 2803 B->was_assembled = PETSC_FALSE; 2804 B->assembled = PETSC_FALSE; 2805 PetscFunctionReturn(0); 2806 } 2807 2808 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2809 { 2810 Mat_MPIAIJ *b; 2811 PetscErrorCode ierr; 2812 2813 PetscFunctionBegin; 2814 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2815 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2816 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2817 b = (Mat_MPIAIJ*)B->data; 2818 2819 #if defined(PETSC_USE_CTABLE) 2820 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2821 #else 2822 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2823 #endif 2824 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2825 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2826 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2827 2828 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2829 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2830 B->preallocated = PETSC_TRUE; 2831 B->was_assembled = PETSC_FALSE; 2832 B->assembled = PETSC_FALSE; 2833 PetscFunctionReturn(0); 2834 } 2835 2836 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2837 { 2838 Mat mat; 2839 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2840 PetscErrorCode ierr; 2841 2842 PetscFunctionBegin; 2843 *newmat = 0; 2844 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2845 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2846 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2847 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2848 a = (Mat_MPIAIJ*)mat->data; 2849 2850 mat->factortype = matin->factortype; 2851 mat->assembled = matin->assembled; 2852 mat->insertmode = NOT_SET_VALUES; 2853 mat->preallocated = matin->preallocated; 2854 2855 a->size = oldmat->size; 2856 a->rank = oldmat->rank; 2857 a->donotstash = oldmat->donotstash; 2858 a->roworiented = oldmat->roworiented; 2859 a->rowindices = NULL; 2860 a->rowvalues = NULL; 2861 a->getrowactive = PETSC_FALSE; 2862 2863 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2864 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2865 2866 if (oldmat->colmap) { 2867 #if defined(PETSC_USE_CTABLE) 2868 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2869 #else 2870 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2871 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2872 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2873 #endif 2874 } else a->colmap = NULL; 2875 if (oldmat->garray) { 2876 PetscInt len; 2877 len = oldmat->B->cmap->n; 2878 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2879 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2880 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2881 } else a->garray = NULL; 2882 2883 /* It may happen MatDuplicate is called with a non-assembled matrix 2884 In fact, MatDuplicate only requires the matrix to be preallocated 2885 This may happen inside a DMCreateMatrix_Shell */ 2886 if (oldmat->lvec) { 2887 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2888 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2889 } 2890 if (oldmat->Mvctx) { 2891 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2892 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2893 } 2894 if (oldmat->Mvctx_mpi1) { 2895 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2896 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2897 } 2898 2899 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2900 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2901 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2902 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2903 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2904 *newmat = mat; 2905 PetscFunctionReturn(0); 2906 } 2907 2908 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2909 { 2910 PetscBool isbinary, ishdf5; 2911 PetscErrorCode ierr; 2912 2913 PetscFunctionBegin; 2914 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2915 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2916 /* force binary viewer to load .info file if it has not yet done so */ 2917 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2918 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2919 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2920 if (isbinary) { 2921 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2922 } else if (ishdf5) { 2923 #if defined(PETSC_HAVE_HDF5) 2924 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2925 #else 2926 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2927 #endif 2928 } else { 2929 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2930 } 2931 PetscFunctionReturn(0); 2932 } 2933 2934 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2935 { 2936 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2937 PetscInt *rowidxs,*colidxs; 2938 PetscScalar *matvals; 2939 PetscErrorCode ierr; 2940 2941 PetscFunctionBegin; 2942 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2943 2944 /* read in matrix header */ 2945 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2946 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2947 M = header[1]; N = header[2]; nz = header[3]; 2948 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 2949 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 2950 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2951 2952 /* set block sizes from the viewer's .info file */ 2953 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 2954 /* set global sizes if not set already */ 2955 if (mat->rmap->N < 0) mat->rmap->N = M; 2956 if (mat->cmap->N < 0) mat->cmap->N = N; 2957 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 2958 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 2959 2960 /* check if the matrix sizes are correct */ 2961 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2962 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 2963 2964 /* read in row lengths and build row indices */ 2965 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 2966 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 2967 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 2968 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 2969 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 2970 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 2971 /* read in column indices and matrix values */ 2972 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 2973 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 2974 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 2975 /* store matrix indices and values */ 2976 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 2977 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 2978 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 2979 PetscFunctionReturn(0); 2980 } 2981 2982 /* Not scalable because of ISAllGather() unless getting all columns. */ 2983 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2984 { 2985 PetscErrorCode ierr; 2986 IS iscol_local; 2987 PetscBool isstride; 2988 PetscMPIInt lisstride=0,gisstride; 2989 2990 PetscFunctionBegin; 2991 /* check if we are grabbing all columns*/ 2992 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2993 2994 if (isstride) { 2995 PetscInt start,len,mstart,mlen; 2996 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2997 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 2998 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 2999 if (mstart == start && mlen-mstart == len) lisstride = 1; 3000 } 3001 3002 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3003 if (gisstride) { 3004 PetscInt N; 3005 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3006 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3007 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3008 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3009 } else { 3010 PetscInt cbs; 3011 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3012 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3013 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3014 } 3015 3016 *isseq = iscol_local; 3017 PetscFunctionReturn(0); 3018 } 3019 3020 /* 3021 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3022 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3023 3024 Input Parameters: 3025 mat - matrix 3026 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3027 i.e., mat->rstart <= isrow[i] < mat->rend 3028 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3029 i.e., mat->cstart <= iscol[i] < mat->cend 3030 Output Parameter: 3031 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3032 iscol_o - sequential column index set for retrieving mat->B 3033 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3034 */ 3035 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3036 { 3037 PetscErrorCode ierr; 3038 Vec x,cmap; 3039 const PetscInt *is_idx; 3040 PetscScalar *xarray,*cmaparray; 3041 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3042 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3043 Mat B=a->B; 3044 Vec lvec=a->lvec,lcmap; 3045 PetscInt i,cstart,cend,Bn=B->cmap->N; 3046 MPI_Comm comm; 3047 VecScatter Mvctx=a->Mvctx; 3048 3049 PetscFunctionBegin; 3050 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3051 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3052 3053 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3054 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3055 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3056 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3057 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3058 3059 /* Get start indices */ 3060 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3061 isstart -= ncols; 3062 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3063 3064 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3065 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3066 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3067 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3068 for (i=0; i<ncols; i++) { 3069 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3070 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3071 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3072 } 3073 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3074 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3075 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3076 3077 /* Get iscol_d */ 3078 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3079 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3080 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3081 3082 /* Get isrow_d */ 3083 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3084 rstart = mat->rmap->rstart; 3085 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3086 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3087 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3088 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3089 3090 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3091 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3092 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3093 3094 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3095 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3096 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3097 3098 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3099 3100 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3101 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3102 3103 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3104 /* off-process column indices */ 3105 count = 0; 3106 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3107 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3108 3109 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3110 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3111 for (i=0; i<Bn; i++) { 3112 if (PetscRealPart(xarray[i]) > -1.0) { 3113 idx[count] = i; /* local column index in off-diagonal part B */ 3114 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3115 count++; 3116 } 3117 } 3118 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3119 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3120 3121 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3122 /* cannot ensure iscol_o has same blocksize as iscol! */ 3123 3124 ierr = PetscFree(idx);CHKERRQ(ierr); 3125 *garray = cmap1; 3126 3127 ierr = VecDestroy(&x);CHKERRQ(ierr); 3128 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3129 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3130 PetscFunctionReturn(0); 3131 } 3132 3133 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3134 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3135 { 3136 PetscErrorCode ierr; 3137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3138 Mat M = NULL; 3139 MPI_Comm comm; 3140 IS iscol_d,isrow_d,iscol_o; 3141 Mat Asub = NULL,Bsub = NULL; 3142 PetscInt n; 3143 3144 PetscFunctionBegin; 3145 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3146 3147 if (call == MAT_REUSE_MATRIX) { 3148 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3149 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3150 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3151 3152 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3153 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3154 3155 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3156 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3157 3158 /* Update diagonal and off-diagonal portions of submat */ 3159 asub = (Mat_MPIAIJ*)(*submat)->data; 3160 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3161 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3162 if (n) { 3163 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3164 } 3165 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3166 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3167 3168 } else { /* call == MAT_INITIAL_MATRIX) */ 3169 const PetscInt *garray; 3170 PetscInt BsubN; 3171 3172 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3173 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3174 3175 /* Create local submatrices Asub and Bsub */ 3176 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3177 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3178 3179 /* Create submatrix M */ 3180 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3181 3182 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3183 asub = (Mat_MPIAIJ*)M->data; 3184 3185 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3186 n = asub->B->cmap->N; 3187 if (BsubN > n) { 3188 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3189 const PetscInt *idx; 3190 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3191 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3192 3193 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3194 j = 0; 3195 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3196 for (i=0; i<n; i++) { 3197 if (j >= BsubN) break; 3198 while (subgarray[i] > garray[j]) j++; 3199 3200 if (subgarray[i] == garray[j]) { 3201 idx_new[i] = idx[j++]; 3202 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3203 } 3204 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3205 3206 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3207 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3208 3209 } else if (BsubN < n) { 3210 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3211 } 3212 3213 ierr = PetscFree(garray);CHKERRQ(ierr); 3214 *submat = M; 3215 3216 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3217 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3218 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3219 3220 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3221 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3222 3223 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3224 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3225 } 3226 PetscFunctionReturn(0); 3227 } 3228 3229 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3230 { 3231 PetscErrorCode ierr; 3232 IS iscol_local=NULL,isrow_d; 3233 PetscInt csize; 3234 PetscInt n,i,j,start,end; 3235 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3236 MPI_Comm comm; 3237 3238 PetscFunctionBegin; 3239 /* If isrow has same processor distribution as mat, 3240 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3241 if (call == MAT_REUSE_MATRIX) { 3242 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3243 if (isrow_d) { 3244 sameRowDist = PETSC_TRUE; 3245 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3246 } else { 3247 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3248 if (iscol_local) { 3249 sameRowDist = PETSC_TRUE; 3250 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3251 } 3252 } 3253 } else { 3254 /* Check if isrow has same processor distribution as mat */ 3255 sameDist[0] = PETSC_FALSE; 3256 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3257 if (!n) { 3258 sameDist[0] = PETSC_TRUE; 3259 } else { 3260 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3261 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3262 if (i >= start && j < end) { 3263 sameDist[0] = PETSC_TRUE; 3264 } 3265 } 3266 3267 /* Check if iscol has same processor distribution as mat */ 3268 sameDist[1] = PETSC_FALSE; 3269 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3270 if (!n) { 3271 sameDist[1] = PETSC_TRUE; 3272 } else { 3273 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3274 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3275 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3276 } 3277 3278 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3279 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3280 sameRowDist = tsameDist[0]; 3281 } 3282 3283 if (sameRowDist) { 3284 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3285 /* isrow and iscol have same processor distribution as mat */ 3286 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3287 PetscFunctionReturn(0); 3288 } else { /* sameRowDist */ 3289 /* isrow has same processor distribution as mat */ 3290 if (call == MAT_INITIAL_MATRIX) { 3291 PetscBool sorted; 3292 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3293 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3294 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3295 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3296 3297 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3298 if (sorted) { 3299 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3300 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3301 PetscFunctionReturn(0); 3302 } 3303 } else { /* call == MAT_REUSE_MATRIX */ 3304 IS iscol_sub; 3305 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3306 if (iscol_sub) { 3307 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3308 PetscFunctionReturn(0); 3309 } 3310 } 3311 } 3312 } 3313 3314 /* General case: iscol -> iscol_local which has global size of iscol */ 3315 if (call == MAT_REUSE_MATRIX) { 3316 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3317 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3318 } else { 3319 if (!iscol_local) { 3320 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3321 } 3322 } 3323 3324 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3325 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3326 3327 if (call == MAT_INITIAL_MATRIX) { 3328 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3329 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3330 } 3331 PetscFunctionReturn(0); 3332 } 3333 3334 /*@C 3335 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3336 and "off-diagonal" part of the matrix in CSR format. 3337 3338 Collective 3339 3340 Input Parameters: 3341 + comm - MPI communicator 3342 . A - "diagonal" portion of matrix 3343 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3344 - garray - global index of B columns 3345 3346 Output Parameter: 3347 . mat - the matrix, with input A as its local diagonal matrix 3348 Level: advanced 3349 3350 Notes: 3351 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3352 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3353 3354 .seealso: MatCreateMPIAIJWithSplitArrays() 3355 @*/ 3356 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3357 { 3358 PetscErrorCode ierr; 3359 Mat_MPIAIJ *maij; 3360 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3361 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3362 PetscScalar *oa=b->a; 3363 Mat Bnew; 3364 PetscInt m,n,N; 3365 3366 PetscFunctionBegin; 3367 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3368 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3369 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3370 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3371 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3372 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3373 3374 /* Get global columns of mat */ 3375 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3376 3377 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3378 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3379 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3380 maij = (Mat_MPIAIJ*)(*mat)->data; 3381 3382 (*mat)->preallocated = PETSC_TRUE; 3383 3384 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3385 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3386 3387 /* Set A as diagonal portion of *mat */ 3388 maij->A = A; 3389 3390 nz = oi[m]; 3391 for (i=0; i<nz; i++) { 3392 col = oj[i]; 3393 oj[i] = garray[col]; 3394 } 3395 3396 /* Set Bnew as off-diagonal portion of *mat */ 3397 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3398 bnew = (Mat_SeqAIJ*)Bnew->data; 3399 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3400 maij->B = Bnew; 3401 3402 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3403 3404 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3405 b->free_a = PETSC_FALSE; 3406 b->free_ij = PETSC_FALSE; 3407 ierr = MatDestroy(&B);CHKERRQ(ierr); 3408 3409 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3410 bnew->free_a = PETSC_TRUE; 3411 bnew->free_ij = PETSC_TRUE; 3412 3413 /* condense columns of maij->B */ 3414 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3415 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3416 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3417 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3418 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3419 PetscFunctionReturn(0); 3420 } 3421 3422 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3423 3424 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3425 { 3426 PetscErrorCode ierr; 3427 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3428 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3429 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3430 Mat M,Msub,B=a->B; 3431 MatScalar *aa; 3432 Mat_SeqAIJ *aij; 3433 PetscInt *garray = a->garray,*colsub,Ncols; 3434 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3435 IS iscol_sub,iscmap; 3436 const PetscInt *is_idx,*cmap; 3437 PetscBool allcolumns=PETSC_FALSE; 3438 MPI_Comm comm; 3439 3440 PetscFunctionBegin; 3441 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3442 3443 if (call == MAT_REUSE_MATRIX) { 3444 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3445 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3446 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3447 3448 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3449 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3450 3451 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3452 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3453 3454 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3455 3456 } else { /* call == MAT_INITIAL_MATRIX) */ 3457 PetscBool flg; 3458 3459 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3460 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3461 3462 /* (1) iscol -> nonscalable iscol_local */ 3463 /* Check for special case: each processor gets entire matrix columns */ 3464 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3465 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3466 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3467 if (allcolumns) { 3468 iscol_sub = iscol_local; 3469 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3470 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3471 3472 } else { 3473 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3474 PetscInt *idx,*cmap1,k; 3475 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3476 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3477 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3478 count = 0; 3479 k = 0; 3480 for (i=0; i<Ncols; i++) { 3481 j = is_idx[i]; 3482 if (j >= cstart && j < cend) { 3483 /* diagonal part of mat */ 3484 idx[count] = j; 3485 cmap1[count++] = i; /* column index in submat */ 3486 } else if (Bn) { 3487 /* off-diagonal part of mat */ 3488 if (j == garray[k]) { 3489 idx[count] = j; 3490 cmap1[count++] = i; /* column index in submat */ 3491 } else if (j > garray[k]) { 3492 while (j > garray[k] && k < Bn-1) k++; 3493 if (j == garray[k]) { 3494 idx[count] = j; 3495 cmap1[count++] = i; /* column index in submat */ 3496 } 3497 } 3498 } 3499 } 3500 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3501 3502 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3503 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3504 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3505 3506 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3507 } 3508 3509 /* (3) Create sequential Msub */ 3510 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3511 } 3512 3513 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3514 aij = (Mat_SeqAIJ*)(Msub)->data; 3515 ii = aij->i; 3516 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3517 3518 /* 3519 m - number of local rows 3520 Ncols - number of columns (same on all processors) 3521 rstart - first row in new global matrix generated 3522 */ 3523 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3524 3525 if (call == MAT_INITIAL_MATRIX) { 3526 /* (4) Create parallel newmat */ 3527 PetscMPIInt rank,size; 3528 PetscInt csize; 3529 3530 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3531 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3532 3533 /* 3534 Determine the number of non-zeros in the diagonal and off-diagonal 3535 portions of the matrix in order to do correct preallocation 3536 */ 3537 3538 /* first get start and end of "diagonal" columns */ 3539 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3540 if (csize == PETSC_DECIDE) { 3541 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3542 if (mglobal == Ncols) { /* square matrix */ 3543 nlocal = m; 3544 } else { 3545 nlocal = Ncols/size + ((Ncols % size) > rank); 3546 } 3547 } else { 3548 nlocal = csize; 3549 } 3550 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3551 rstart = rend - nlocal; 3552 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3553 3554 /* next, compute all the lengths */ 3555 jj = aij->j; 3556 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3557 olens = dlens + m; 3558 for (i=0; i<m; i++) { 3559 jend = ii[i+1] - ii[i]; 3560 olen = 0; 3561 dlen = 0; 3562 for (j=0; j<jend; j++) { 3563 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3564 else dlen++; 3565 jj++; 3566 } 3567 olens[i] = olen; 3568 dlens[i] = dlen; 3569 } 3570 3571 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3572 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3573 3574 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3575 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3576 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3577 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3578 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3579 ierr = PetscFree(dlens);CHKERRQ(ierr); 3580 3581 } else { /* call == MAT_REUSE_MATRIX */ 3582 M = *newmat; 3583 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3584 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3585 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3586 /* 3587 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3588 rather than the slower MatSetValues(). 3589 */ 3590 M->was_assembled = PETSC_TRUE; 3591 M->assembled = PETSC_FALSE; 3592 } 3593 3594 /* (5) Set values of Msub to *newmat */ 3595 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3596 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3597 3598 jj = aij->j; 3599 aa = aij->a; 3600 for (i=0; i<m; i++) { 3601 row = rstart + i; 3602 nz = ii[i+1] - ii[i]; 3603 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3604 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3605 jj += nz; aa += nz; 3606 } 3607 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3608 3609 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3610 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3611 3612 ierr = PetscFree(colsub);CHKERRQ(ierr); 3613 3614 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3615 if (call == MAT_INITIAL_MATRIX) { 3616 *newmat = M; 3617 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3618 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3619 3620 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3621 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3622 3623 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3624 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3625 3626 if (iscol_local) { 3627 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3628 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3629 } 3630 } 3631 PetscFunctionReturn(0); 3632 } 3633 3634 /* 3635 Not great since it makes two copies of the submatrix, first an SeqAIJ 3636 in local and then by concatenating the local matrices the end result. 3637 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3638 3639 Note: This requires a sequential iscol with all indices. 3640 */ 3641 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3642 { 3643 PetscErrorCode ierr; 3644 PetscMPIInt rank,size; 3645 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3646 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3647 Mat M,Mreuse; 3648 MatScalar *aa,*vwork; 3649 MPI_Comm comm; 3650 Mat_SeqAIJ *aij; 3651 PetscBool colflag,allcolumns=PETSC_FALSE; 3652 3653 PetscFunctionBegin; 3654 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3655 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3656 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3657 3658 /* Check for special case: each processor gets entire matrix columns */ 3659 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3660 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3661 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3662 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3663 3664 if (call == MAT_REUSE_MATRIX) { 3665 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3666 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3667 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3668 } else { 3669 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3670 } 3671 3672 /* 3673 m - number of local rows 3674 n - number of columns (same on all processors) 3675 rstart - first row in new global matrix generated 3676 */ 3677 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3678 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3679 if (call == MAT_INITIAL_MATRIX) { 3680 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3681 ii = aij->i; 3682 jj = aij->j; 3683 3684 /* 3685 Determine the number of non-zeros in the diagonal and off-diagonal 3686 portions of the matrix in order to do correct preallocation 3687 */ 3688 3689 /* first get start and end of "diagonal" columns */ 3690 if (csize == PETSC_DECIDE) { 3691 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3692 if (mglobal == n) { /* square matrix */ 3693 nlocal = m; 3694 } else { 3695 nlocal = n/size + ((n % size) > rank); 3696 } 3697 } else { 3698 nlocal = csize; 3699 } 3700 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3701 rstart = rend - nlocal; 3702 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3703 3704 /* next, compute all the lengths */ 3705 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3706 olens = dlens + m; 3707 for (i=0; i<m; i++) { 3708 jend = ii[i+1] - ii[i]; 3709 olen = 0; 3710 dlen = 0; 3711 for (j=0; j<jend; j++) { 3712 if (*jj < rstart || *jj >= rend) olen++; 3713 else dlen++; 3714 jj++; 3715 } 3716 olens[i] = olen; 3717 dlens[i] = dlen; 3718 } 3719 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3720 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3721 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3722 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3723 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3724 ierr = PetscFree(dlens);CHKERRQ(ierr); 3725 } else { 3726 PetscInt ml,nl; 3727 3728 M = *newmat; 3729 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3730 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3731 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3732 /* 3733 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3734 rather than the slower MatSetValues(). 3735 */ 3736 M->was_assembled = PETSC_TRUE; 3737 M->assembled = PETSC_FALSE; 3738 } 3739 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3740 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3741 ii = aij->i; 3742 jj = aij->j; 3743 aa = aij->a; 3744 for (i=0; i<m; i++) { 3745 row = rstart + i; 3746 nz = ii[i+1] - ii[i]; 3747 cwork = jj; jj += nz; 3748 vwork = aa; aa += nz; 3749 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3750 } 3751 3752 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3753 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3754 *newmat = M; 3755 3756 /* save submatrix used in processor for next request */ 3757 if (call == MAT_INITIAL_MATRIX) { 3758 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3759 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3760 } 3761 PetscFunctionReturn(0); 3762 } 3763 3764 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3765 { 3766 PetscInt m,cstart, cend,j,nnz,i,d; 3767 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3768 const PetscInt *JJ; 3769 PetscErrorCode ierr; 3770 PetscBool nooffprocentries; 3771 3772 PetscFunctionBegin; 3773 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3774 3775 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3776 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3777 m = B->rmap->n; 3778 cstart = B->cmap->rstart; 3779 cend = B->cmap->rend; 3780 rstart = B->rmap->rstart; 3781 3782 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3783 3784 #if defined(PETSC_USE_DEBUG) 3785 for (i=0; i<m; i++) { 3786 nnz = Ii[i+1]- Ii[i]; 3787 JJ = J + Ii[i]; 3788 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3789 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3790 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3791 } 3792 #endif 3793 3794 for (i=0; i<m; i++) { 3795 nnz = Ii[i+1]- Ii[i]; 3796 JJ = J + Ii[i]; 3797 nnz_max = PetscMax(nnz_max,nnz); 3798 d = 0; 3799 for (j=0; j<nnz; j++) { 3800 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3801 } 3802 d_nnz[i] = d; 3803 o_nnz[i] = nnz - d; 3804 } 3805 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3806 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3807 3808 for (i=0; i<m; i++) { 3809 ii = i + rstart; 3810 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3811 } 3812 nooffprocentries = B->nooffprocentries; 3813 B->nooffprocentries = PETSC_TRUE; 3814 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3815 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3816 B->nooffprocentries = nooffprocentries; 3817 3818 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3819 PetscFunctionReturn(0); 3820 } 3821 3822 /*@ 3823 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3824 (the default parallel PETSc format). 3825 3826 Collective 3827 3828 Input Parameters: 3829 + B - the matrix 3830 . i - the indices into j for the start of each local row (starts with zero) 3831 . j - the column indices for each local row (starts with zero) 3832 - v - optional values in the matrix 3833 3834 Level: developer 3835 3836 Notes: 3837 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3838 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3839 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3840 3841 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3842 3843 The format which is used for the sparse matrix input, is equivalent to a 3844 row-major ordering.. i.e for the following matrix, the input data expected is 3845 as shown 3846 3847 $ 1 0 0 3848 $ 2 0 3 P0 3849 $ ------- 3850 $ 4 5 6 P1 3851 $ 3852 $ Process0 [P0]: rows_owned=[0,1] 3853 $ i = {0,1,3} [size = nrow+1 = 2+1] 3854 $ j = {0,0,2} [size = 3] 3855 $ v = {1,2,3} [size = 3] 3856 $ 3857 $ Process1 [P1]: rows_owned=[2] 3858 $ i = {0,3} [size = nrow+1 = 1+1] 3859 $ j = {0,1,2} [size = 3] 3860 $ v = {4,5,6} [size = 3] 3861 3862 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3863 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3864 @*/ 3865 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3866 { 3867 PetscErrorCode ierr; 3868 3869 PetscFunctionBegin; 3870 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3871 PetscFunctionReturn(0); 3872 } 3873 3874 /*@C 3875 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3876 (the default parallel PETSc format). For good matrix assembly performance 3877 the user should preallocate the matrix storage by setting the parameters 3878 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3879 performance can be increased by more than a factor of 50. 3880 3881 Collective 3882 3883 Input Parameters: 3884 + B - the matrix 3885 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3886 (same value is used for all local rows) 3887 . d_nnz - array containing the number of nonzeros in the various rows of the 3888 DIAGONAL portion of the local submatrix (possibly different for each row) 3889 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3890 The size of this array is equal to the number of local rows, i.e 'm'. 3891 For matrices that will be factored, you must leave room for (and set) 3892 the diagonal entry even if it is zero. 3893 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3894 submatrix (same value is used for all local rows). 3895 - o_nnz - array containing the number of nonzeros in the various rows of the 3896 OFF-DIAGONAL portion of the local submatrix (possibly different for 3897 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3898 structure. The size of this array is equal to the number 3899 of local rows, i.e 'm'. 3900 3901 If the *_nnz parameter is given then the *_nz parameter is ignored 3902 3903 The AIJ format (also called the Yale sparse matrix format or 3904 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3905 storage. The stored row and column indices begin with zero. 3906 See Users-Manual: ch_mat for details. 3907 3908 The parallel matrix is partitioned such that the first m0 rows belong to 3909 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3910 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3911 3912 The DIAGONAL portion of the local submatrix of a processor can be defined 3913 as the submatrix which is obtained by extraction the part corresponding to 3914 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3915 first row that belongs to the processor, r2 is the last row belonging to 3916 the this processor, and c1-c2 is range of indices of the local part of a 3917 vector suitable for applying the matrix to. This is an mxn matrix. In the 3918 common case of a square matrix, the row and column ranges are the same and 3919 the DIAGONAL part is also square. The remaining portion of the local 3920 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3921 3922 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3923 3924 You can call MatGetInfo() to get information on how effective the preallocation was; 3925 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3926 You can also run with the option -info and look for messages with the string 3927 malloc in them to see if additional memory allocation was needed. 3928 3929 Example usage: 3930 3931 Consider the following 8x8 matrix with 34 non-zero values, that is 3932 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3933 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3934 as follows: 3935 3936 .vb 3937 1 2 0 | 0 3 0 | 0 4 3938 Proc0 0 5 6 | 7 0 0 | 8 0 3939 9 0 10 | 11 0 0 | 12 0 3940 ------------------------------------- 3941 13 0 14 | 15 16 17 | 0 0 3942 Proc1 0 18 0 | 19 20 21 | 0 0 3943 0 0 0 | 22 23 0 | 24 0 3944 ------------------------------------- 3945 Proc2 25 26 27 | 0 0 28 | 29 0 3946 30 0 0 | 31 32 33 | 0 34 3947 .ve 3948 3949 This can be represented as a collection of submatrices as: 3950 3951 .vb 3952 A B C 3953 D E F 3954 G H I 3955 .ve 3956 3957 Where the submatrices A,B,C are owned by proc0, D,E,F are 3958 owned by proc1, G,H,I are owned by proc2. 3959 3960 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3961 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3962 The 'M','N' parameters are 8,8, and have the same values on all procs. 3963 3964 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3965 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3966 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3967 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3968 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3969 matrix, ans [DF] as another SeqAIJ matrix. 3970 3971 When d_nz, o_nz parameters are specified, d_nz storage elements are 3972 allocated for every row of the local diagonal submatrix, and o_nz 3973 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3974 One way to choose d_nz and o_nz is to use the max nonzerors per local 3975 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3976 In this case, the values of d_nz,o_nz are: 3977 .vb 3978 proc0 : dnz = 2, o_nz = 2 3979 proc1 : dnz = 3, o_nz = 2 3980 proc2 : dnz = 1, o_nz = 4 3981 .ve 3982 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3983 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3984 for proc3. i.e we are using 12+15+10=37 storage locations to store 3985 34 values. 3986 3987 When d_nnz, o_nnz parameters are specified, the storage is specified 3988 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3989 In the above case the values for d_nnz,o_nnz are: 3990 .vb 3991 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3992 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3993 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3994 .ve 3995 Here the space allocated is sum of all the above values i.e 34, and 3996 hence pre-allocation is perfect. 3997 3998 Level: intermediate 3999 4000 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4001 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4002 @*/ 4003 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4004 { 4005 PetscErrorCode ierr; 4006 4007 PetscFunctionBegin; 4008 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4009 PetscValidType(B,1); 4010 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4011 PetscFunctionReturn(0); 4012 } 4013 4014 /*@ 4015 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4016 CSR format for the local rows. 4017 4018 Collective 4019 4020 Input Parameters: 4021 + comm - MPI communicator 4022 . m - number of local rows (Cannot be PETSC_DECIDE) 4023 . n - This value should be the same as the local size used in creating the 4024 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4025 calculated if N is given) For square matrices n is almost always m. 4026 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4027 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4028 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4029 . j - column indices 4030 - a - matrix values 4031 4032 Output Parameter: 4033 . mat - the matrix 4034 4035 Level: intermediate 4036 4037 Notes: 4038 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4039 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4040 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4041 4042 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4043 4044 The format which is used for the sparse matrix input, is equivalent to a 4045 row-major ordering.. i.e for the following matrix, the input data expected is 4046 as shown 4047 4048 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4049 4050 $ 1 0 0 4051 $ 2 0 3 P0 4052 $ ------- 4053 $ 4 5 6 P1 4054 $ 4055 $ Process0 [P0]: rows_owned=[0,1] 4056 $ i = {0,1,3} [size = nrow+1 = 2+1] 4057 $ j = {0,0,2} [size = 3] 4058 $ v = {1,2,3} [size = 3] 4059 $ 4060 $ Process1 [P1]: rows_owned=[2] 4061 $ i = {0,3} [size = nrow+1 = 1+1] 4062 $ j = {0,1,2} [size = 3] 4063 $ v = {4,5,6} [size = 3] 4064 4065 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4066 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4067 @*/ 4068 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4069 { 4070 PetscErrorCode ierr; 4071 4072 PetscFunctionBegin; 4073 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4074 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4075 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4076 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4077 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4078 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4079 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4080 PetscFunctionReturn(0); 4081 } 4082 4083 /*@ 4084 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4085 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4086 4087 Collective 4088 4089 Input Parameters: 4090 + mat - the matrix 4091 . m - number of local rows (Cannot be PETSC_DECIDE) 4092 . n - This value should be the same as the local size used in creating the 4093 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4094 calculated if N is given) For square matrices n is almost always m. 4095 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4096 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4097 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4098 . J - column indices 4099 - v - matrix values 4100 4101 Level: intermediate 4102 4103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4104 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4105 @*/ 4106 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4107 { 4108 PetscErrorCode ierr; 4109 PetscInt cstart,nnz,i,j; 4110 PetscInt *ld; 4111 PetscBool nooffprocentries; 4112 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4113 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4114 PetscScalar *ad = Ad->a, *ao = Ao->a; 4115 const PetscInt *Adi = Ad->i; 4116 PetscInt ldi,Iii,md; 4117 4118 PetscFunctionBegin; 4119 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4120 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4121 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4122 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4123 4124 cstart = mat->cmap->rstart; 4125 if (!Aij->ld) { 4126 /* count number of entries below block diagonal */ 4127 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4128 Aij->ld = ld; 4129 for (i=0; i<m; i++) { 4130 nnz = Ii[i+1]- Ii[i]; 4131 j = 0; 4132 while (J[j] < cstart && j < nnz) {j++;} 4133 J += nnz; 4134 ld[i] = j; 4135 } 4136 } else { 4137 ld = Aij->ld; 4138 } 4139 4140 for (i=0; i<m; i++) { 4141 nnz = Ii[i+1]- Ii[i]; 4142 Iii = Ii[i]; 4143 ldi = ld[i]; 4144 md = Adi[i+1]-Adi[i]; 4145 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4146 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4147 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4148 ad += md; 4149 ao += nnz - md; 4150 } 4151 nooffprocentries = mat->nooffprocentries; 4152 mat->nooffprocentries = PETSC_TRUE; 4153 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4154 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4155 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4156 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4157 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4158 mat->nooffprocentries = nooffprocentries; 4159 PetscFunctionReturn(0); 4160 } 4161 4162 /*@C 4163 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4164 (the default parallel PETSc format). For good matrix assembly performance 4165 the user should preallocate the matrix storage by setting the parameters 4166 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4167 performance can be increased by more than a factor of 50. 4168 4169 Collective 4170 4171 Input Parameters: 4172 + comm - MPI communicator 4173 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4174 This value should be the same as the local size used in creating the 4175 y vector for the matrix-vector product y = Ax. 4176 . n - This value should be the same as the local size used in creating the 4177 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4178 calculated if N is given) For square matrices n is almost always m. 4179 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4180 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4181 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4182 (same value is used for all local rows) 4183 . d_nnz - array containing the number of nonzeros in the various rows of the 4184 DIAGONAL portion of the local submatrix (possibly different for each row) 4185 or NULL, if d_nz is used to specify the nonzero structure. 4186 The size of this array is equal to the number of local rows, i.e 'm'. 4187 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4188 submatrix (same value is used for all local rows). 4189 - o_nnz - array containing the number of nonzeros in the various rows of the 4190 OFF-DIAGONAL portion of the local submatrix (possibly different for 4191 each row) or NULL, if o_nz is used to specify the nonzero 4192 structure. The size of this array is equal to the number 4193 of local rows, i.e 'm'. 4194 4195 Output Parameter: 4196 . A - the matrix 4197 4198 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4199 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4200 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4201 4202 Notes: 4203 If the *_nnz parameter is given then the *_nz parameter is ignored 4204 4205 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4206 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4207 storage requirements for this matrix. 4208 4209 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4210 processor than it must be used on all processors that share the object for 4211 that argument. 4212 4213 The user MUST specify either the local or global matrix dimensions 4214 (possibly both). 4215 4216 The parallel matrix is partitioned across processors such that the 4217 first m0 rows belong to process 0, the next m1 rows belong to 4218 process 1, the next m2 rows belong to process 2 etc.. where 4219 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4220 values corresponding to [m x N] submatrix. 4221 4222 The columns are logically partitioned with the n0 columns belonging 4223 to 0th partition, the next n1 columns belonging to the next 4224 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4225 4226 The DIAGONAL portion of the local submatrix on any given processor 4227 is the submatrix corresponding to the rows and columns m,n 4228 corresponding to the given processor. i.e diagonal matrix on 4229 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4230 etc. The remaining portion of the local submatrix [m x (N-n)] 4231 constitute the OFF-DIAGONAL portion. The example below better 4232 illustrates this concept. 4233 4234 For a square global matrix we define each processor's diagonal portion 4235 to be its local rows and the corresponding columns (a square submatrix); 4236 each processor's off-diagonal portion encompasses the remainder of the 4237 local matrix (a rectangular submatrix). 4238 4239 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4240 4241 When calling this routine with a single process communicator, a matrix of 4242 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4243 type of communicator, use the construction mechanism 4244 .vb 4245 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4246 .ve 4247 4248 $ MatCreate(...,&A); 4249 $ MatSetType(A,MATMPIAIJ); 4250 $ MatSetSizes(A, m,n,M,N); 4251 $ MatMPIAIJSetPreallocation(A,...); 4252 4253 By default, this format uses inodes (identical nodes) when possible. 4254 We search for consecutive rows with the same nonzero structure, thereby 4255 reusing matrix information to achieve increased efficiency. 4256 4257 Options Database Keys: 4258 + -mat_no_inode - Do not use inodes 4259 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4260 4261 4262 4263 Example usage: 4264 4265 Consider the following 8x8 matrix with 34 non-zero values, that is 4266 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4267 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4268 as follows 4269 4270 .vb 4271 1 2 0 | 0 3 0 | 0 4 4272 Proc0 0 5 6 | 7 0 0 | 8 0 4273 9 0 10 | 11 0 0 | 12 0 4274 ------------------------------------- 4275 13 0 14 | 15 16 17 | 0 0 4276 Proc1 0 18 0 | 19 20 21 | 0 0 4277 0 0 0 | 22 23 0 | 24 0 4278 ------------------------------------- 4279 Proc2 25 26 27 | 0 0 28 | 29 0 4280 30 0 0 | 31 32 33 | 0 34 4281 .ve 4282 4283 This can be represented as a collection of submatrices as 4284 4285 .vb 4286 A B C 4287 D E F 4288 G H I 4289 .ve 4290 4291 Where the submatrices A,B,C are owned by proc0, D,E,F are 4292 owned by proc1, G,H,I are owned by proc2. 4293 4294 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4295 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4296 The 'M','N' parameters are 8,8, and have the same values on all procs. 4297 4298 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4299 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4300 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4301 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4302 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4303 matrix, ans [DF] as another SeqAIJ matrix. 4304 4305 When d_nz, o_nz parameters are specified, d_nz storage elements are 4306 allocated for every row of the local diagonal submatrix, and o_nz 4307 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4308 One way to choose d_nz and o_nz is to use the max nonzerors per local 4309 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4310 In this case, the values of d_nz,o_nz are 4311 .vb 4312 proc0 : dnz = 2, o_nz = 2 4313 proc1 : dnz = 3, o_nz = 2 4314 proc2 : dnz = 1, o_nz = 4 4315 .ve 4316 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4317 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4318 for proc3. i.e we are using 12+15+10=37 storage locations to store 4319 34 values. 4320 4321 When d_nnz, o_nnz parameters are specified, the storage is specified 4322 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4323 In the above case the values for d_nnz,o_nnz are 4324 .vb 4325 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4326 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4327 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4328 .ve 4329 Here the space allocated is sum of all the above values i.e 34, and 4330 hence pre-allocation is perfect. 4331 4332 Level: intermediate 4333 4334 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4335 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4336 @*/ 4337 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4338 { 4339 PetscErrorCode ierr; 4340 PetscMPIInt size; 4341 4342 PetscFunctionBegin; 4343 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4344 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4345 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4346 if (size > 1) { 4347 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4348 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4349 } else { 4350 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4351 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4352 } 4353 PetscFunctionReturn(0); 4354 } 4355 4356 /*@C 4357 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4358 4359 Not collective 4360 4361 Input Parameter: 4362 . A - The MPIAIJ matrix 4363 4364 Output Parameters: 4365 + Ad - The local diagonal block as a SeqAIJ matrix 4366 . Ao - The local off-diagonal block as a SeqAIJ matrix 4367 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4368 4369 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4370 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4371 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4372 local column numbers to global column numbers in the original matrix. 4373 4374 Level: intermediate 4375 4376 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4377 @*/ 4378 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4379 { 4380 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4381 PetscBool flg; 4382 PetscErrorCode ierr; 4383 4384 PetscFunctionBegin; 4385 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4386 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4387 if (Ad) *Ad = a->A; 4388 if (Ao) *Ao = a->B; 4389 if (colmap) *colmap = a->garray; 4390 PetscFunctionReturn(0); 4391 } 4392 4393 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4394 { 4395 PetscErrorCode ierr; 4396 PetscInt m,N,i,rstart,nnz,Ii; 4397 PetscInt *indx; 4398 PetscScalar *values; 4399 4400 PetscFunctionBegin; 4401 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4402 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4403 PetscInt *dnz,*onz,sum,bs,cbs; 4404 4405 if (n == PETSC_DECIDE) { 4406 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4407 } 4408 /* Check sum(n) = N */ 4409 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4410 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4411 4412 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4413 rstart -= m; 4414 4415 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4416 for (i=0; i<m; i++) { 4417 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4418 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4419 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4420 } 4421 4422 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4423 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4424 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4425 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4426 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4427 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4428 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4429 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4430 } 4431 4432 /* numeric phase */ 4433 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4434 for (i=0; i<m; i++) { 4435 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4436 Ii = i + rstart; 4437 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4438 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4439 } 4440 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4441 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4442 PetscFunctionReturn(0); 4443 } 4444 4445 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4446 { 4447 PetscErrorCode ierr; 4448 PetscMPIInt rank; 4449 PetscInt m,N,i,rstart,nnz; 4450 size_t len; 4451 const PetscInt *indx; 4452 PetscViewer out; 4453 char *name; 4454 Mat B; 4455 const PetscScalar *values; 4456 4457 PetscFunctionBegin; 4458 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4459 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4460 /* Should this be the type of the diagonal block of A? */ 4461 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4462 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4463 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4464 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4465 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4466 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4467 for (i=0; i<m; i++) { 4468 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4469 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4470 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4471 } 4472 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4473 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4474 4475 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4476 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4477 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4478 sprintf(name,"%s.%d",outfile,rank); 4479 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4480 ierr = PetscFree(name);CHKERRQ(ierr); 4481 ierr = MatView(B,out);CHKERRQ(ierr); 4482 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4483 ierr = MatDestroy(&B);CHKERRQ(ierr); 4484 PetscFunctionReturn(0); 4485 } 4486 4487 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4488 { 4489 PetscErrorCode ierr; 4490 Mat_Merge_SeqsToMPI *merge; 4491 PetscContainer container; 4492 4493 PetscFunctionBegin; 4494 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4495 if (container) { 4496 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4504 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4505 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4506 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4507 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4508 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4509 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4510 ierr = PetscFree(merge);CHKERRQ(ierr); 4511 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4512 } 4513 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4514 PetscFunctionReturn(0); 4515 } 4516 4517 #include <../src/mat/utils/freespace.h> 4518 #include <petscbt.h> 4519 4520 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4521 { 4522 PetscErrorCode ierr; 4523 MPI_Comm comm; 4524 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4525 PetscMPIInt size,rank,taga,*len_s; 4526 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4527 PetscInt proc,m; 4528 PetscInt **buf_ri,**buf_rj; 4529 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4530 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4531 MPI_Request *s_waits,*r_waits; 4532 MPI_Status *status; 4533 MatScalar *aa=a->a; 4534 MatScalar **abuf_r,*ba_i; 4535 Mat_Merge_SeqsToMPI *merge; 4536 PetscContainer container; 4537 4538 PetscFunctionBegin; 4539 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4540 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4541 4542 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4543 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4544 4545 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4546 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4547 4548 bi = merge->bi; 4549 bj = merge->bj; 4550 buf_ri = merge->buf_ri; 4551 buf_rj = merge->buf_rj; 4552 4553 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4554 owners = merge->rowmap->range; 4555 len_s = merge->len_s; 4556 4557 /* send and recv matrix values */ 4558 /*-----------------------------*/ 4559 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4560 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4561 4562 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4563 for (proc=0,k=0; proc<size; proc++) { 4564 if (!len_s[proc]) continue; 4565 i = owners[proc]; 4566 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4567 k++; 4568 } 4569 4570 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4571 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4572 ierr = PetscFree(status);CHKERRQ(ierr); 4573 4574 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4575 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4576 4577 /* insert mat values of mpimat */ 4578 /*----------------------------*/ 4579 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4580 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4581 4582 for (k=0; k<merge->nrecv; k++) { 4583 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4584 nrows = *(buf_ri_k[k]); 4585 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4586 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4587 } 4588 4589 /* set values of ba */ 4590 m = merge->rowmap->n; 4591 for (i=0; i<m; i++) { 4592 arow = owners[rank] + i; 4593 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4594 bnzi = bi[i+1] - bi[i]; 4595 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4596 4597 /* add local non-zero vals of this proc's seqmat into ba */ 4598 anzi = ai[arow+1] - ai[arow]; 4599 aj = a->j + ai[arow]; 4600 aa = a->a + ai[arow]; 4601 nextaj = 0; 4602 for (j=0; nextaj<anzi; j++) { 4603 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4604 ba_i[j] += aa[nextaj++]; 4605 } 4606 } 4607 4608 /* add received vals into ba */ 4609 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4610 /* i-th row */ 4611 if (i == *nextrow[k]) { 4612 anzi = *(nextai[k]+1) - *nextai[k]; 4613 aj = buf_rj[k] + *(nextai[k]); 4614 aa = abuf_r[k] + *(nextai[k]); 4615 nextaj = 0; 4616 for (j=0; nextaj<anzi; j++) { 4617 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4618 ba_i[j] += aa[nextaj++]; 4619 } 4620 } 4621 nextrow[k]++; nextai[k]++; 4622 } 4623 } 4624 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4625 } 4626 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4627 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4628 4629 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4630 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4631 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4632 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4633 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4634 PetscFunctionReturn(0); 4635 } 4636 4637 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4638 { 4639 PetscErrorCode ierr; 4640 Mat B_mpi; 4641 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4642 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4643 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4644 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4645 PetscInt len,proc,*dnz,*onz,bs,cbs; 4646 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4647 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4648 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4649 MPI_Status *status; 4650 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4651 PetscBT lnkbt; 4652 Mat_Merge_SeqsToMPI *merge; 4653 PetscContainer container; 4654 4655 PetscFunctionBegin; 4656 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4657 4658 /* make sure it is a PETSc comm */ 4659 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4660 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4661 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4662 4663 ierr = PetscNew(&merge);CHKERRQ(ierr); 4664 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4665 4666 /* determine row ownership */ 4667 /*---------------------------------------------------------*/ 4668 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4669 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4670 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4671 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4672 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4673 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4674 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4675 4676 m = merge->rowmap->n; 4677 owners = merge->rowmap->range; 4678 4679 /* determine the number of messages to send, their lengths */ 4680 /*---------------------------------------------------------*/ 4681 len_s = merge->len_s; 4682 4683 len = 0; /* length of buf_si[] */ 4684 merge->nsend = 0; 4685 for (proc=0; proc<size; proc++) { 4686 len_si[proc] = 0; 4687 if (proc == rank) { 4688 len_s[proc] = 0; 4689 } else { 4690 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4691 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4692 } 4693 if (len_s[proc]) { 4694 merge->nsend++; 4695 nrows = 0; 4696 for (i=owners[proc]; i<owners[proc+1]; i++) { 4697 if (ai[i+1] > ai[i]) nrows++; 4698 } 4699 len_si[proc] = 2*(nrows+1); 4700 len += len_si[proc]; 4701 } 4702 } 4703 4704 /* determine the number and length of messages to receive for ij-structure */ 4705 /*-------------------------------------------------------------------------*/ 4706 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4707 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4708 4709 /* post the Irecv of j-structure */ 4710 /*-------------------------------*/ 4711 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4712 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4713 4714 /* post the Isend of j-structure */ 4715 /*--------------------------------*/ 4716 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4717 4718 for (proc=0, k=0; proc<size; proc++) { 4719 if (!len_s[proc]) continue; 4720 i = owners[proc]; 4721 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4722 k++; 4723 } 4724 4725 /* receives and sends of j-structure are complete */ 4726 /*------------------------------------------------*/ 4727 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4728 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4729 4730 /* send and recv i-structure */ 4731 /*---------------------------*/ 4732 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4733 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4734 4735 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4736 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4737 for (proc=0,k=0; proc<size; proc++) { 4738 if (!len_s[proc]) continue; 4739 /* form outgoing message for i-structure: 4740 buf_si[0]: nrows to be sent 4741 [1:nrows]: row index (global) 4742 [nrows+1:2*nrows+1]: i-structure index 4743 */ 4744 /*-------------------------------------------*/ 4745 nrows = len_si[proc]/2 - 1; 4746 buf_si_i = buf_si + nrows+1; 4747 buf_si[0] = nrows; 4748 buf_si_i[0] = 0; 4749 nrows = 0; 4750 for (i=owners[proc]; i<owners[proc+1]; i++) { 4751 anzi = ai[i+1] - ai[i]; 4752 if (anzi) { 4753 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4754 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4755 nrows++; 4756 } 4757 } 4758 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4759 k++; 4760 buf_si += len_si[proc]; 4761 } 4762 4763 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4764 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4765 4766 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4767 for (i=0; i<merge->nrecv; i++) { 4768 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4769 } 4770 4771 ierr = PetscFree(len_si);CHKERRQ(ierr); 4772 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4773 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4774 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4775 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4776 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4777 ierr = PetscFree(status);CHKERRQ(ierr); 4778 4779 /* compute a local seq matrix in each processor */ 4780 /*----------------------------------------------*/ 4781 /* allocate bi array and free space for accumulating nonzero column info */ 4782 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4783 bi[0] = 0; 4784 4785 /* create and initialize a linked list */ 4786 nlnk = N+1; 4787 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4788 4789 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4790 len = ai[owners[rank+1]] - ai[owners[rank]]; 4791 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4792 4793 current_space = free_space; 4794 4795 /* determine symbolic info for each local row */ 4796 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4797 4798 for (k=0; k<merge->nrecv; k++) { 4799 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4800 nrows = *buf_ri_k[k]; 4801 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4802 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4803 } 4804 4805 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4806 len = 0; 4807 for (i=0; i<m; i++) { 4808 bnzi = 0; 4809 /* add local non-zero cols of this proc's seqmat into lnk */ 4810 arow = owners[rank] + i; 4811 anzi = ai[arow+1] - ai[arow]; 4812 aj = a->j + ai[arow]; 4813 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4814 bnzi += nlnk; 4815 /* add received col data into lnk */ 4816 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4817 if (i == *nextrow[k]) { /* i-th row */ 4818 anzi = *(nextai[k]+1) - *nextai[k]; 4819 aj = buf_rj[k] + *nextai[k]; 4820 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4821 bnzi += nlnk; 4822 nextrow[k]++; nextai[k]++; 4823 } 4824 } 4825 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4826 4827 /* if free space is not available, make more free space */ 4828 if (current_space->local_remaining<bnzi) { 4829 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4830 nspacedouble++; 4831 } 4832 /* copy data into free space, then initialize lnk */ 4833 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4834 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4835 4836 current_space->array += bnzi; 4837 current_space->local_used += bnzi; 4838 current_space->local_remaining -= bnzi; 4839 4840 bi[i+1] = bi[i] + bnzi; 4841 } 4842 4843 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4844 4845 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4846 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4847 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4848 4849 /* create symbolic parallel matrix B_mpi */ 4850 /*---------------------------------------*/ 4851 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4852 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4853 if (n==PETSC_DECIDE) { 4854 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4855 } else { 4856 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4857 } 4858 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4859 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4860 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4861 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4862 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4863 4864 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4865 B_mpi->assembled = PETSC_FALSE; 4866 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4867 merge->bi = bi; 4868 merge->bj = bj; 4869 merge->buf_ri = buf_ri; 4870 merge->buf_rj = buf_rj; 4871 merge->coi = NULL; 4872 merge->coj = NULL; 4873 merge->owners_co = NULL; 4874 4875 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4876 4877 /* attach the supporting struct to B_mpi for reuse */ 4878 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4879 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4880 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4881 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4882 *mpimat = B_mpi; 4883 4884 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4885 PetscFunctionReturn(0); 4886 } 4887 4888 /*@C 4889 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4890 matrices from each processor 4891 4892 Collective 4893 4894 Input Parameters: 4895 + comm - the communicators the parallel matrix will live on 4896 . seqmat - the input sequential matrices 4897 . m - number of local rows (or PETSC_DECIDE) 4898 . n - number of local columns (or PETSC_DECIDE) 4899 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4900 4901 Output Parameter: 4902 . mpimat - the parallel matrix generated 4903 4904 Level: advanced 4905 4906 Notes: 4907 The dimensions of the sequential matrix in each processor MUST be the same. 4908 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4909 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4910 @*/ 4911 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4912 { 4913 PetscErrorCode ierr; 4914 PetscMPIInt size; 4915 4916 PetscFunctionBegin; 4917 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4918 if (size == 1) { 4919 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4920 if (scall == MAT_INITIAL_MATRIX) { 4921 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4922 } else { 4923 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4924 } 4925 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4926 PetscFunctionReturn(0); 4927 } 4928 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4929 if (scall == MAT_INITIAL_MATRIX) { 4930 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4931 } 4932 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4933 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4934 PetscFunctionReturn(0); 4935 } 4936 4937 /*@ 4938 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4939 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4940 with MatGetSize() 4941 4942 Not Collective 4943 4944 Input Parameters: 4945 + A - the matrix 4946 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4947 4948 Output Parameter: 4949 . A_loc - the local sequential matrix generated 4950 4951 Level: developer 4952 4953 Notes: 4954 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4955 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4956 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4957 modify the values of the returned A_loc. 4958 4959 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 4960 4961 @*/ 4962 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4963 { 4964 PetscErrorCode ierr; 4965 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4966 Mat_SeqAIJ *mat,*a,*b; 4967 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4968 MatScalar *aa,*ba,*cam; 4969 PetscScalar *ca; 4970 PetscMPIInt size; 4971 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4972 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4973 PetscBool match; 4974 4975 PetscFunctionBegin; 4976 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 4977 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4978 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 4979 if (size == 1) { 4980 if (scall == MAT_INITIAL_MATRIX) { 4981 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 4982 *A_loc = mpimat->A; 4983 } else if (scall == MAT_REUSE_MATRIX) { 4984 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4985 } 4986 PetscFunctionReturn(0); 4987 } 4988 4989 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4990 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4991 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4992 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4993 aa = a->a; ba = b->a; 4994 if (scall == MAT_INITIAL_MATRIX) { 4995 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4996 ci[0] = 0; 4997 for (i=0; i<am; i++) { 4998 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4999 } 5000 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5001 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5002 k = 0; 5003 for (i=0; i<am; i++) { 5004 ncols_o = bi[i+1] - bi[i]; 5005 ncols_d = ai[i+1] - ai[i]; 5006 /* off-diagonal portion of A */ 5007 for (jo=0; jo<ncols_o; jo++) { 5008 col = cmap[*bj]; 5009 if (col >= cstart) break; 5010 cj[k] = col; bj++; 5011 ca[k++] = *ba++; 5012 } 5013 /* diagonal portion of A */ 5014 for (j=0; j<ncols_d; j++) { 5015 cj[k] = cstart + *aj++; 5016 ca[k++] = *aa++; 5017 } 5018 /* off-diagonal portion of A */ 5019 for (j=jo; j<ncols_o; j++) { 5020 cj[k] = cmap[*bj++]; 5021 ca[k++] = *ba++; 5022 } 5023 } 5024 /* put together the new matrix */ 5025 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5026 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5027 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5028 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5029 mat->free_a = PETSC_TRUE; 5030 mat->free_ij = PETSC_TRUE; 5031 mat->nonew = 0; 5032 } else if (scall == MAT_REUSE_MATRIX) { 5033 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5034 ci = mat->i; cj = mat->j; cam = mat->a; 5035 for (i=0; i<am; i++) { 5036 /* off-diagonal portion of A */ 5037 ncols_o = bi[i+1] - bi[i]; 5038 for (jo=0; jo<ncols_o; jo++) { 5039 col = cmap[*bj]; 5040 if (col >= cstart) break; 5041 *cam++ = *ba++; bj++; 5042 } 5043 /* diagonal portion of A */ 5044 ncols_d = ai[i+1] - ai[i]; 5045 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5046 /* off-diagonal portion of A */ 5047 for (j=jo; j<ncols_o; j++) { 5048 *cam++ = *ba++; bj++; 5049 } 5050 } 5051 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5052 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5053 PetscFunctionReturn(0); 5054 } 5055 5056 /*@C 5057 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5058 5059 Not Collective 5060 5061 Input Parameters: 5062 + A - the matrix 5063 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5064 - row, col - index sets of rows and columns to extract (or NULL) 5065 5066 Output Parameter: 5067 . A_loc - the local sequential matrix generated 5068 5069 Level: developer 5070 5071 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5072 5073 @*/ 5074 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5075 { 5076 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5077 PetscErrorCode ierr; 5078 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5079 IS isrowa,iscola; 5080 Mat *aloc; 5081 PetscBool match; 5082 5083 PetscFunctionBegin; 5084 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5085 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5086 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5087 if (!row) { 5088 start = A->rmap->rstart; end = A->rmap->rend; 5089 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5090 } else { 5091 isrowa = *row; 5092 } 5093 if (!col) { 5094 start = A->cmap->rstart; 5095 cmap = a->garray; 5096 nzA = a->A->cmap->n; 5097 nzB = a->B->cmap->n; 5098 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5099 ncols = 0; 5100 for (i=0; i<nzB; i++) { 5101 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5102 else break; 5103 } 5104 imark = i; 5105 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5106 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5107 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5108 } else { 5109 iscola = *col; 5110 } 5111 if (scall != MAT_INITIAL_MATRIX) { 5112 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5113 aloc[0] = *A_loc; 5114 } 5115 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5116 if (!col) { /* attach global id of condensed columns */ 5117 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5118 } 5119 *A_loc = aloc[0]; 5120 ierr = PetscFree(aloc);CHKERRQ(ierr); 5121 if (!row) { 5122 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5123 } 5124 if (!col) { 5125 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5126 } 5127 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5128 PetscFunctionReturn(0); 5129 } 5130 5131 /* 5132 * Destroy a mat that may be composed with PetscSF communication objects. 5133 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5134 * */ 5135 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5136 { 5137 PetscSF sf,osf; 5138 IS map; 5139 PetscErrorCode ierr; 5140 5141 PetscFunctionBegin; 5142 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5143 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5144 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5145 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5146 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5147 ierr = ISDestroy(&map);CHKERRQ(ierr); 5148 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5149 PetscFunctionReturn(0); 5150 } 5151 5152 /* 5153 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5154 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5155 * on a global size. 5156 * */ 5157 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5158 { 5159 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5160 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5161 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5162 PetscMPIInt owner; 5163 PetscSFNode *iremote,*oiremote; 5164 const PetscInt *lrowindices; 5165 PetscErrorCode ierr; 5166 PetscSF sf,osf; 5167 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5168 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5169 MPI_Comm comm; 5170 ISLocalToGlobalMapping mapping; 5171 5172 PetscFunctionBegin; 5173 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5174 /* plocalsize is the number of roots 5175 * nrows is the number of leaves 5176 * */ 5177 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5178 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5179 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5180 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5181 for (i=0;i<nrows;i++) { 5182 /* Find a remote index and an owner for a row 5183 * The row could be local or remote 5184 * */ 5185 owner = 0; 5186 lidx = 0; 5187 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5188 iremote[i].index = lidx; 5189 iremote[i].rank = owner; 5190 } 5191 /* Create SF to communicate how many nonzero columns for each row */ 5192 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5193 /* SF will figure out the number of nonzero colunms for each row, and their 5194 * offsets 5195 * */ 5196 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5197 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5198 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5199 5200 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5201 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5202 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5203 roffsets[0] = 0; 5204 roffsets[1] = 0; 5205 for (i=0;i<plocalsize;i++) { 5206 /* diag */ 5207 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5208 /* off diag */ 5209 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5210 /* compute offsets so that we relative location for each row */ 5211 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5212 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5213 } 5214 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5215 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5216 /* 'r' means root, and 'l' means leaf */ 5217 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5218 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5219 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5220 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5221 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5222 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5223 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5224 dntotalcols = 0; 5225 ontotalcols = 0; 5226 ncol = 0; 5227 for (i=0;i<nrows;i++) { 5228 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5229 ncol = PetscMax(pnnz[i],ncol); 5230 /* diag */ 5231 dntotalcols += nlcols[i*2+0]; 5232 /* off diag */ 5233 ontotalcols += nlcols[i*2+1]; 5234 } 5235 /* We do not need to figure the right number of columns 5236 * since all the calculations will be done by going through the raw data 5237 * */ 5238 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5239 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5240 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5241 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5242 /* diag */ 5243 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5244 /* off diag */ 5245 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5246 /* diag */ 5247 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5248 /* off diag */ 5249 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5250 dntotalcols = 0; 5251 ontotalcols = 0; 5252 ntotalcols = 0; 5253 for (i=0;i<nrows;i++) { 5254 owner = 0; 5255 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5256 /* Set iremote for diag matrix */ 5257 for (j=0;j<nlcols[i*2+0];j++) { 5258 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5259 iremote[dntotalcols].rank = owner; 5260 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5261 ilocal[dntotalcols++] = ntotalcols++; 5262 } 5263 /* off diag */ 5264 for (j=0;j<nlcols[i*2+1];j++) { 5265 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5266 oiremote[ontotalcols].rank = owner; 5267 oilocal[ontotalcols++] = ntotalcols++; 5268 } 5269 } 5270 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5271 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5272 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5273 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5274 /* P serves as roots and P_oth is leaves 5275 * Diag matrix 5276 * */ 5277 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5278 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5279 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5280 5281 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5282 /* Off diag */ 5283 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5284 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5285 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5286 /* We operate on the matrix internal data for saving memory */ 5287 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5288 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5289 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5290 /* Convert to global indices for diag matrix */ 5291 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5292 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5293 /* We want P_oth store global indices */ 5294 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5295 /* Use memory scalable approach */ 5296 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5297 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5298 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5299 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5300 /* Convert back to local indices */ 5301 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5302 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5303 nout = 0; 5304 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5305 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5306 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5307 /* Exchange values */ 5308 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5309 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5310 /* Stop PETSc from shrinking memory */ 5311 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5312 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5313 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5314 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5315 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5316 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5317 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5318 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5319 PetscFunctionReturn(0); 5320 } 5321 5322 /* 5323 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5324 * This supports MPIAIJ and MAIJ 5325 * */ 5326 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5327 { 5328 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5329 Mat_SeqAIJ *p_oth; 5330 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5331 IS rows,map; 5332 PetscHMapI hamp; 5333 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5334 MPI_Comm comm; 5335 PetscSF sf,osf; 5336 PetscBool has; 5337 PetscErrorCode ierr; 5338 5339 PetscFunctionBegin; 5340 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5341 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5342 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5343 * and then create a submatrix (that often is an overlapping matrix) 5344 * */ 5345 if (reuse==MAT_INITIAL_MATRIX) { 5346 /* Use a hash table to figure out unique keys */ 5347 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5348 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5349 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5350 count = 0; 5351 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5352 for (i=0;i<a->B->cmap->n;i++) { 5353 key = a->garray[i]/dof; 5354 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5355 if (!has) { 5356 mapping[i] = count; 5357 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5358 } else { 5359 /* Current 'i' has the same value the previous step */ 5360 mapping[i] = count-1; 5361 } 5362 } 5363 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5364 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5365 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5366 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5367 off = 0; 5368 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5369 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5370 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5371 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5372 /* In case, the matrix was already created but users want to recreate the matrix */ 5373 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5374 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5375 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5376 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5377 } else if (reuse==MAT_REUSE_MATRIX) { 5378 /* If matrix was already created, we simply update values using SF objects 5379 * that as attached to the matrix ealier. 5380 * */ 5381 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5382 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5383 if (!sf || !osf) { 5384 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5385 } 5386 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5387 /* Update values in place */ 5388 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5389 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5390 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5391 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5392 } else { 5393 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5394 } 5395 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5396 PetscFunctionReturn(0); 5397 } 5398 5399 /*@C 5400 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5401 5402 Collective on Mat 5403 5404 Input Parameters: 5405 + A,B - the matrices in mpiaij format 5406 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5407 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5408 5409 Output Parameter: 5410 + rowb, colb - index sets of rows and columns of B to extract 5411 - B_seq - the sequential matrix generated 5412 5413 Level: developer 5414 5415 @*/ 5416 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5417 { 5418 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5419 PetscErrorCode ierr; 5420 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5421 IS isrowb,iscolb; 5422 Mat *bseq=NULL; 5423 5424 PetscFunctionBegin; 5425 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5426 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5427 } 5428 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5429 5430 if (scall == MAT_INITIAL_MATRIX) { 5431 start = A->cmap->rstart; 5432 cmap = a->garray; 5433 nzA = a->A->cmap->n; 5434 nzB = a->B->cmap->n; 5435 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5436 ncols = 0; 5437 for (i=0; i<nzB; i++) { /* row < local row index */ 5438 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5439 else break; 5440 } 5441 imark = i; 5442 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5443 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5444 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5445 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5446 } else { 5447 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5448 isrowb = *rowb; iscolb = *colb; 5449 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5450 bseq[0] = *B_seq; 5451 } 5452 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5453 *B_seq = bseq[0]; 5454 ierr = PetscFree(bseq);CHKERRQ(ierr); 5455 if (!rowb) { 5456 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5457 } else { 5458 *rowb = isrowb; 5459 } 5460 if (!colb) { 5461 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5462 } else { 5463 *colb = iscolb; 5464 } 5465 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5466 PetscFunctionReturn(0); 5467 } 5468 5469 /* 5470 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5471 of the OFF-DIAGONAL portion of local A 5472 5473 Collective on Mat 5474 5475 Input Parameters: 5476 + A,B - the matrices in mpiaij format 5477 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5478 5479 Output Parameter: 5480 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5481 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5482 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5483 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5484 5485 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5486 for this matrix. This is not desirable.. 5487 5488 Level: developer 5489 5490 */ 5491 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5492 { 5493 PetscErrorCode ierr; 5494 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5495 Mat_SeqAIJ *b_oth; 5496 VecScatter ctx; 5497 MPI_Comm comm; 5498 const PetscMPIInt *rprocs,*sprocs; 5499 const PetscInt *srow,*rstarts,*sstarts; 5500 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5501 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5502 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5503 MPI_Request *rwaits = NULL,*swaits = NULL; 5504 MPI_Status rstatus; 5505 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5506 5507 PetscFunctionBegin; 5508 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5509 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5510 5511 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5512 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5513 } 5514 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5515 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5516 5517 if (size == 1) { 5518 startsj_s = NULL; 5519 bufa_ptr = NULL; 5520 *B_oth = NULL; 5521 PetscFunctionReturn(0); 5522 } 5523 5524 ctx = a->Mvctx; 5525 tag = ((PetscObject)ctx)->tag; 5526 5527 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5528 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5529 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5530 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5531 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5532 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5533 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5534 5535 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5536 if (scall == MAT_INITIAL_MATRIX) { 5537 /* i-array */ 5538 /*---------*/ 5539 /* post receives */ 5540 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5541 for (i=0; i<nrecvs; i++) { 5542 rowlen = rvalues + rstarts[i]*rbs; 5543 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5544 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5545 } 5546 5547 /* pack the outgoing message */ 5548 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5549 5550 sstartsj[0] = 0; 5551 rstartsj[0] = 0; 5552 len = 0; /* total length of j or a array to be sent */ 5553 if (nsends) { 5554 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5555 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5556 } 5557 for (i=0; i<nsends; i++) { 5558 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5559 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5560 for (j=0; j<nrows; j++) { 5561 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5562 for (l=0; l<sbs; l++) { 5563 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5564 5565 rowlen[j*sbs+l] = ncols; 5566 5567 len += ncols; 5568 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5569 } 5570 k++; 5571 } 5572 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5573 5574 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5575 } 5576 /* recvs and sends of i-array are completed */ 5577 i = nrecvs; 5578 while (i--) { 5579 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5580 } 5581 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5582 ierr = PetscFree(svalues);CHKERRQ(ierr); 5583 5584 /* allocate buffers for sending j and a arrays */ 5585 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5586 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5587 5588 /* create i-array of B_oth */ 5589 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5590 5591 b_othi[0] = 0; 5592 len = 0; /* total length of j or a array to be received */ 5593 k = 0; 5594 for (i=0; i<nrecvs; i++) { 5595 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5596 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5597 for (j=0; j<nrows; j++) { 5598 b_othi[k+1] = b_othi[k] + rowlen[j]; 5599 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5600 k++; 5601 } 5602 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5603 } 5604 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5605 5606 /* allocate space for j and a arrrays of B_oth */ 5607 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5608 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5609 5610 /* j-array */ 5611 /*---------*/ 5612 /* post receives of j-array */ 5613 for (i=0; i<nrecvs; i++) { 5614 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5615 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5616 } 5617 5618 /* pack the outgoing message j-array */ 5619 if (nsends) k = sstarts[0]; 5620 for (i=0; i<nsends; i++) { 5621 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5622 bufJ = bufj+sstartsj[i]; 5623 for (j=0; j<nrows; j++) { 5624 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5625 for (ll=0; ll<sbs; ll++) { 5626 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5627 for (l=0; l<ncols; l++) { 5628 *bufJ++ = cols[l]; 5629 } 5630 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5631 } 5632 } 5633 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5634 } 5635 5636 /* recvs and sends of j-array are completed */ 5637 i = nrecvs; 5638 while (i--) { 5639 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5640 } 5641 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5642 } else if (scall == MAT_REUSE_MATRIX) { 5643 sstartsj = *startsj_s; 5644 rstartsj = *startsj_r; 5645 bufa = *bufa_ptr; 5646 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5647 b_otha = b_oth->a; 5648 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5649 5650 /* a-array */ 5651 /*---------*/ 5652 /* post receives of a-array */ 5653 for (i=0; i<nrecvs; i++) { 5654 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5655 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5656 } 5657 5658 /* pack the outgoing message a-array */ 5659 if (nsends) k = sstarts[0]; 5660 for (i=0; i<nsends; i++) { 5661 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5662 bufA = bufa+sstartsj[i]; 5663 for (j=0; j<nrows; j++) { 5664 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5665 for (ll=0; ll<sbs; ll++) { 5666 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5667 for (l=0; l<ncols; l++) { 5668 *bufA++ = vals[l]; 5669 } 5670 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5671 } 5672 } 5673 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5674 } 5675 /* recvs and sends of a-array are completed */ 5676 i = nrecvs; 5677 while (i--) { 5678 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5679 } 5680 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5681 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5682 5683 if (scall == MAT_INITIAL_MATRIX) { 5684 /* put together the new matrix */ 5685 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5686 5687 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5688 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5689 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5690 b_oth->free_a = PETSC_TRUE; 5691 b_oth->free_ij = PETSC_TRUE; 5692 b_oth->nonew = 0; 5693 5694 ierr = PetscFree(bufj);CHKERRQ(ierr); 5695 if (!startsj_s || !bufa_ptr) { 5696 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5697 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5698 } else { 5699 *startsj_s = sstartsj; 5700 *startsj_r = rstartsj; 5701 *bufa_ptr = bufa; 5702 } 5703 } 5704 5705 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5706 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5707 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5708 PetscFunctionReturn(0); 5709 } 5710 5711 /*@C 5712 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5713 5714 Not Collective 5715 5716 Input Parameters: 5717 . A - The matrix in mpiaij format 5718 5719 Output Parameter: 5720 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5721 . colmap - A map from global column index to local index into lvec 5722 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5723 5724 Level: developer 5725 5726 @*/ 5727 #if defined(PETSC_USE_CTABLE) 5728 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5729 #else 5730 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5731 #endif 5732 { 5733 Mat_MPIAIJ *a; 5734 5735 PetscFunctionBegin; 5736 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5737 PetscValidPointer(lvec, 2); 5738 PetscValidPointer(colmap, 3); 5739 PetscValidPointer(multScatter, 4); 5740 a = (Mat_MPIAIJ*) A->data; 5741 if (lvec) *lvec = a->lvec; 5742 if (colmap) *colmap = a->colmap; 5743 if (multScatter) *multScatter = a->Mvctx; 5744 PetscFunctionReturn(0); 5745 } 5746 5747 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5748 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5749 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5750 #if defined(PETSC_HAVE_MKL_SPARSE) 5751 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5752 #endif 5753 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5754 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5755 #if defined(PETSC_HAVE_ELEMENTAL) 5756 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5757 #endif 5758 #if defined(PETSC_HAVE_HYPRE) 5759 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5760 #endif 5761 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5762 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5763 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5764 5765 /* 5766 Computes (B'*A')' since computing B*A directly is untenable 5767 5768 n p p 5769 ( ) ( ) ( ) 5770 m ( A ) * n ( B ) = m ( C ) 5771 ( ) ( ) ( ) 5772 5773 */ 5774 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5775 { 5776 PetscErrorCode ierr; 5777 Mat At,Bt,Ct; 5778 5779 PetscFunctionBegin; 5780 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5781 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5782 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5783 ierr = MatDestroy(&At);CHKERRQ(ierr); 5784 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5785 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5786 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5787 PetscFunctionReturn(0); 5788 } 5789 5790 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5791 { 5792 PetscErrorCode ierr; 5793 PetscInt m=A->rmap->n,n=B->cmap->n; 5794 5795 PetscFunctionBegin; 5796 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5797 ierr = MatSetSizes(C,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5798 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5799 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 5800 ierr = MatMPIDenseSetPreallocation(C,NULL);CHKERRQ(ierr); 5801 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5802 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5803 5804 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5805 PetscFunctionReturn(0); 5806 } 5807 5808 /* ----------------------------------------------------------------*/ 5809 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5810 { 5811 Mat_Product *product = C->product; 5812 Mat A = product->A,B=product->B; 5813 5814 PetscFunctionBegin; 5815 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5816 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5817 5818 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5819 C->ops->productsymbolic = MatProductSymbolic_AB; 5820 PetscFunctionReturn(0); 5821 } 5822 5823 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5824 { 5825 PetscErrorCode ierr; 5826 Mat_Product *product = C->product; 5827 5828 PetscFunctionBegin; 5829 if (product->type == MATPRODUCT_AB) { 5830 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 5831 } else SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIAIJ matrices",MatProductTypes[product->type]); 5832 PetscFunctionReturn(0); 5833 } 5834 /* ----------------------------------------------------------------*/ 5835 5836 /*MC 5837 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5838 5839 Options Database Keys: 5840 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5841 5842 Level: beginner 5843 5844 Notes: 5845 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5846 in this case the values associated with the rows and columns one passes in are set to zero 5847 in the matrix 5848 5849 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5850 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5851 5852 .seealso: MatCreateAIJ() 5853 M*/ 5854 5855 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5856 { 5857 Mat_MPIAIJ *b; 5858 PetscErrorCode ierr; 5859 PetscMPIInt size; 5860 5861 PetscFunctionBegin; 5862 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5863 5864 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5865 B->data = (void*)b; 5866 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5867 B->assembled = PETSC_FALSE; 5868 B->insertmode = NOT_SET_VALUES; 5869 b->size = size; 5870 5871 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5872 5873 /* build cache for off array entries formed */ 5874 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5875 5876 b->donotstash = PETSC_FALSE; 5877 b->colmap = 0; 5878 b->garray = 0; 5879 b->roworiented = PETSC_TRUE; 5880 5881 /* stuff used for matrix vector multiply */ 5882 b->lvec = NULL; 5883 b->Mvctx = NULL; 5884 5885 /* stuff for MatGetRow() */ 5886 b->rowindices = 0; 5887 b->rowvalues = 0; 5888 b->getrowactive = PETSC_FALSE; 5889 5890 /* flexible pointer used in CUSP/CUSPARSE classes */ 5891 b->spptr = NULL; 5892 5893 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5894 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5895 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5896 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5897 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5898 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5899 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5900 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5901 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5902 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5903 #if defined(PETSC_HAVE_MKL_SPARSE) 5904 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5905 #endif 5906 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5907 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 5908 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5909 #if defined(PETSC_HAVE_ELEMENTAL) 5910 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5911 #endif 5912 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5913 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5914 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5915 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5916 #if defined(PETSC_HAVE_HYPRE) 5917 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5918 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5919 #endif 5920 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 5921 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 5922 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5923 PetscFunctionReturn(0); 5924 } 5925 5926 /*@C 5927 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5928 and "off-diagonal" part of the matrix in CSR format. 5929 5930 Collective 5931 5932 Input Parameters: 5933 + comm - MPI communicator 5934 . m - number of local rows (Cannot be PETSC_DECIDE) 5935 . n - This value should be the same as the local size used in creating the 5936 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5937 calculated if N is given) For square matrices n is almost always m. 5938 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5939 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5940 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5941 . j - column indices 5942 . a - matrix values 5943 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5944 . oj - column indices 5945 - oa - matrix values 5946 5947 Output Parameter: 5948 . mat - the matrix 5949 5950 Level: advanced 5951 5952 Notes: 5953 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5954 must free the arrays once the matrix has been destroyed and not before. 5955 5956 The i and j indices are 0 based 5957 5958 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5959 5960 This sets local rows and cannot be used to set off-processor values. 5961 5962 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5963 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5964 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5965 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5966 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5967 communication if it is known that only local entries will be set. 5968 5969 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5970 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5971 @*/ 5972 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5973 { 5974 PetscErrorCode ierr; 5975 Mat_MPIAIJ *maij; 5976 5977 PetscFunctionBegin; 5978 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5979 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5980 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5981 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5982 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5983 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5984 maij = (Mat_MPIAIJ*) (*mat)->data; 5985 5986 (*mat)->preallocated = PETSC_TRUE; 5987 5988 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5989 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5990 5991 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5992 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5993 5994 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5995 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5996 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5997 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5998 5999 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6000 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6001 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6002 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6003 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6004 PetscFunctionReturn(0); 6005 } 6006 6007 /* 6008 Special version for direct calls from Fortran 6009 */ 6010 #include <petsc/private/fortranimpl.h> 6011 6012 /* Change these macros so can be used in void function */ 6013 #undef CHKERRQ 6014 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6015 #undef SETERRQ2 6016 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6017 #undef SETERRQ3 6018 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6019 #undef SETERRQ 6020 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6021 6022 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6023 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6024 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6025 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6026 #else 6027 #endif 6028 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6029 { 6030 Mat mat = *mmat; 6031 PetscInt m = *mm, n = *mn; 6032 InsertMode addv = *maddv; 6033 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6034 PetscScalar value; 6035 PetscErrorCode ierr; 6036 6037 MatCheckPreallocated(mat,1); 6038 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6039 6040 #if defined(PETSC_USE_DEBUG) 6041 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6042 #endif 6043 { 6044 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6045 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6046 PetscBool roworiented = aij->roworiented; 6047 6048 /* Some Variables required in the macro */ 6049 Mat A = aij->A; 6050 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6051 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6052 MatScalar *aa = a->a; 6053 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6054 Mat B = aij->B; 6055 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6056 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6057 MatScalar *ba = b->a; 6058 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6059 * cannot use "#if defined" inside a macro. */ 6060 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6061 6062 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6063 PetscInt nonew = a->nonew; 6064 MatScalar *ap1,*ap2; 6065 6066 PetscFunctionBegin; 6067 for (i=0; i<m; i++) { 6068 if (im[i] < 0) continue; 6069 #if defined(PETSC_USE_DEBUG) 6070 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6071 #endif 6072 if (im[i] >= rstart && im[i] < rend) { 6073 row = im[i] - rstart; 6074 lastcol1 = -1; 6075 rp1 = aj + ai[row]; 6076 ap1 = aa + ai[row]; 6077 rmax1 = aimax[row]; 6078 nrow1 = ailen[row]; 6079 low1 = 0; 6080 high1 = nrow1; 6081 lastcol2 = -1; 6082 rp2 = bj + bi[row]; 6083 ap2 = ba + bi[row]; 6084 rmax2 = bimax[row]; 6085 nrow2 = bilen[row]; 6086 low2 = 0; 6087 high2 = nrow2; 6088 6089 for (j=0; j<n; j++) { 6090 if (roworiented) value = v[i*n+j]; 6091 else value = v[i+j*m]; 6092 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6093 if (in[j] >= cstart && in[j] < cend) { 6094 col = in[j] - cstart; 6095 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6096 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6097 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6098 #endif 6099 } else if (in[j] < 0) continue; 6100 #if defined(PETSC_USE_DEBUG) 6101 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6102 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 6103 #endif 6104 else { 6105 if (mat->was_assembled) { 6106 if (!aij->colmap) { 6107 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6108 } 6109 #if defined(PETSC_USE_CTABLE) 6110 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6111 col--; 6112 #else 6113 col = aij->colmap[in[j]] - 1; 6114 #endif 6115 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6116 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6117 col = in[j]; 6118 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6119 B = aij->B; 6120 b = (Mat_SeqAIJ*)B->data; 6121 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6122 rp2 = bj + bi[row]; 6123 ap2 = ba + bi[row]; 6124 rmax2 = bimax[row]; 6125 nrow2 = bilen[row]; 6126 low2 = 0; 6127 high2 = nrow2; 6128 bm = aij->B->rmap->n; 6129 ba = b->a; 6130 inserted = PETSC_FALSE; 6131 } 6132 } else col = in[j]; 6133 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6134 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6135 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6136 #endif 6137 } 6138 } 6139 } else if (!aij->donotstash) { 6140 if (roworiented) { 6141 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6142 } else { 6143 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6144 } 6145 } 6146 } 6147 } 6148 PetscFunctionReturnVoid(); 6149 } 6150